This tag contains habanalabs driver changes for v6.1:
- Support new notifier event for device state change through eventfd. - Add uAPI to retrieve device attestation information for Gaudi2. - Add uAPI to retrieve the h/w status of all h/w blocks. - Add uAPI to control the running mode of the engine cores in Gaudi2. - Expose whether the device runs with secured firmware through the INFO ioctl and sysfs. - Support trace events in DMA allocations and MMU map/unmap operations. - Notify firmware when the device was acquired by a user process and when it was released. This is done as part of the RAS that the f/w performs. - Multiple bug fixes, refactors and renames. - Cleanup of error messages, moving some to debug level. - Enhance log prints in case of h/w error events for Gaudi2. -----BEGIN PGP SIGNATURE----- iQEzBAABCgAdFiEE7TEboABC71LctBLFZR1NuKta54AFAmMq7cQACgkQZR1NuKta 54CXRAgAwU82gKSHHU9vFp1gavk9tfPl4OHAhPKk1rBDArzCTMZFynAVGdcx1eSX XmsqKnjTIcP5dx3BYrHuUWdW79neHnoVvGKCg5+Lh0jG9xdKbeWapJZ6O0eWk3Mx 9VmoAQ/VlyizIoaxfNFi2ou5QhC6DL6AY1+2Vd1mMC1ru5L5Jzq/IIFdNENnxoAa mTGD0P1EapmzztLbuxH7WYuOPBBhfrTuH1FpGo847QhLcSJ+TDYV7CK1y+uhtEZi isCX2jigqbQ5UwGcesL80nJM1aYyH6AToCSMrmruIDjVbz1QRqW/s6/8hipOoMW6 o7LhHaoOPj20g2fs+R5Jy7ALXDuiOQ== =TdfZ -----END PGP SIGNATURE----- Merge tag 'misc-habanalabs-next-2022-09-21' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next Oded writes: "This tag contains habanalabs driver changes for v6.1: - Support new notifier event for device state change through eventfd. - Add uAPI to retrieve device attestation information for Gaudi2. - Add uAPI to retrieve the h/w status of all h/w blocks. - Add uAPI to control the running mode of the engine cores in Gaudi2. - Expose whether the device runs with secured firmware through the INFO ioctl and sysfs. - Support trace events in DMA allocations and MMU map/unmap operations. - Notify firmware when the device was acquired by a user process and when it was released. This is done as part of the RAS that the f/w performs. - Multiple bug fixes, refactors and renames. - Cleanup of error messages, moving some to debug level. - Enhance log prints in case of h/w error events for Gaudi2." * tag 'misc-habanalabs-next-2022-09-21' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (68 commits) habanalabs: eliminate aggregate use warning habanalabs/gaudi: use 8KB aligned address for TPC kernels habanalabs: remove some f/w descriptor validations habanalabs: build ASICs from new to old habanalabs/gaudi2: allow user to flush PCIE by read habanalabs: failure to open device due to reset is debug level habanalabs/gaudi2: Remove unnecessary (void*) conversions habanalabs/gaudi2: add secured attestation info uapi habanalabs/gaudi2: add handling to pmmu events in eqe handler habanalabs/gaudi: change TPC Assert to use TPC DEC instead of QMAN err habanalabs: rename error info structure habanalabs/gaudi2: get f/w reset status register dynamically habanalabs/gaudi2: increase hard-reset sleep time to 2 sec habanalabs/gaudi2: print RAZWI info upon PCIe access error habanalabs: MMU invalidation h/w is per device habanalabs: new notifier events for device state habanalabs/gaudi2: free event irq if init fails habanalabs: fix resetting the DRAM BAR habanalabs: add support for new cpucp return codes habanalabs/gaudi2: read F/W security indication after hard reset ...
This commit is contained in:
commit
d322259506
@ -16,7 +16,7 @@ Description: Version of the application running on the device's CPU
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/clk_max_freq_mhz
|
||||
Date: Jun 2019
|
||||
KernelVersion: not yet upstreamed
|
||||
KernelVersion: 5.7
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Allows the user to set the maximum clock frequency, in MHz.
|
||||
The device clock might be set to lower value than the maximum.
|
||||
@ -26,7 +26,7 @@ Description: Allows the user to set the maximum clock frequency, in MHz.
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/clk_cur_freq_mhz
|
||||
Date: Jun 2019
|
||||
KernelVersion: not yet upstreamed
|
||||
KernelVersion: 5.7
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays the current frequency, in MHz, of the device clock.
|
||||
This property is valid only for the Gaudi ASIC family
|
||||
@ -176,6 +176,12 @@ KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the device's preboot F/W code
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/security_enabled
|
||||
Date: Oct 2022
|
||||
KernelVersion: 6.1
|
||||
Contact: obitton@habana.ai
|
||||
Description: Displays the device's security status
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/soft_reset
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
@ -230,6 +236,6 @@ Description: Version of the u-boot running on the device's CPU
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/vrm_ver
|
||||
Date: Jan 2022
|
||||
KernelVersion: not yet upstreamed
|
||||
KernelVersion: 5.17
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the Device's Voltage Regulator Monitor F/W code. N/A to GOYA and GAUDI
|
||||
|
@ -8878,6 +8878,7 @@ T: git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git
|
||||
F: Documentation/ABI/testing/debugfs-driver-habanalabs
|
||||
F: Documentation/ABI/testing/sysfs-driver-habanalabs
|
||||
F: drivers/misc/habanalabs/
|
||||
F: include/trace/events/habanalabs.h
|
||||
F: include/uapi/misc/habanalabs.h
|
||||
|
||||
HACKRF MEDIA DRIVER
|
||||
|
@ -10,6 +10,7 @@ config HABANA_AI
|
||||
select HWMON
|
||||
select DMA_SHARED_BUFFER
|
||||
select CRC32
|
||||
select FW_LOADER
|
||||
help
|
||||
Enables PCIe card driver for Habana's AI Processors (AIP) that are
|
||||
designed to accelerate Deep Learning inference and training workloads.
|
||||
|
@ -8,13 +8,13 @@ obj-$(CONFIG_HABANA_AI) := habanalabs.o
|
||||
include $(src)/common/Makefile
|
||||
habanalabs-y += $(HL_COMMON_FILES)
|
||||
|
||||
include $(src)/goya/Makefile
|
||||
habanalabs-y += $(HL_GOYA_FILES)
|
||||
include $(src)/gaudi2/Makefile
|
||||
habanalabs-y += $(HL_GAUDI2_FILES)
|
||||
|
||||
include $(src)/gaudi/Makefile
|
||||
habanalabs-y += $(HL_GAUDI_FILES)
|
||||
|
||||
include $(src)/gaudi2/Makefile
|
||||
habanalabs-y += $(HL_GAUDI2_FILES)
|
||||
include $(src)/goya/Makefile
|
||||
habanalabs-y += $(HL_GOYA_FILES)
|
||||
|
||||
habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o
|
||||
|
@ -12,20 +12,18 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#define CB_VA_POOL_SIZE (4UL * SZ_1G)
|
||||
|
||||
static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct hl_vm_va_block *va_block, *tmp;
|
||||
dma_addr_t bus_addr;
|
||||
u64 virt_addr;
|
||||
u32 page_size = prop->pmmu.page_size;
|
||||
s32 offset;
|
||||
int rc;
|
||||
|
||||
if (!hdev->supports_cb_mapping) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"Cannot map CB because no VA range is allocated for CB mapping\n");
|
||||
"Mapping a CB to the device's MMU is not supported\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -35,106 +33,45 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&cb->va_block_list);
|
||||
if (cb->is_mmu_mapped)
|
||||
return 0;
|
||||
|
||||
for (bus_addr = cb->bus_address;
|
||||
bus_addr < cb->bus_address + cb->size;
|
||||
bus_addr += page_size) {
|
||||
cb->roundup_size = roundup(cb->size, page_size);
|
||||
|
||||
virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size);
|
||||
if (!virt_addr) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate device virtual address for CB\n");
|
||||
rc = -ENOMEM;
|
||||
goto err_va_pool_free;
|
||||
}
|
||||
|
||||
va_block = kzalloc(sizeof(*va_block), GFP_KERNEL);
|
||||
if (!va_block) {
|
||||
rc = -ENOMEM;
|
||||
gen_pool_free(ctx->cb_va_pool, virt_addr, page_size);
|
||||
goto err_va_pool_free;
|
||||
}
|
||||
|
||||
va_block->start = virt_addr;
|
||||
va_block->end = virt_addr + page_size - 1;
|
||||
va_block->size = page_size;
|
||||
list_add_tail(&va_block->node, &cb->va_block_list);
|
||||
cb->virtual_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, cb->roundup_size);
|
||||
if (!cb->virtual_addr) {
|
||||
dev_err(hdev->dev, "Failed to allocate device virtual address for CB\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
|
||||
bus_addr = cb->bus_address;
|
||||
offset = 0;
|
||||
list_for_each_entry(va_block, &cb->va_block_list, node) {
|
||||
rc = hl_mmu_map_page(ctx, va_block->start, bus_addr,
|
||||
va_block->size, list_is_last(&va_block->node,
|
||||
&cb->va_block_list));
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
|
||||
va_block->start);
|
||||
goto err_va_umap;
|
||||
}
|
||||
|
||||
bus_addr += va_block->size;
|
||||
offset += va_block->size;
|
||||
mutex_lock(&hdev->mmu_lock);
|
||||
rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr);
|
||||
goto err_va_umap;
|
||||
}
|
||||
|
||||
rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV);
|
||||
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
mutex_unlock(&hdev->mmu_lock);
|
||||
|
||||
cb->is_mmu_mapped = true;
|
||||
|
||||
return rc;
|
||||
|
||||
err_va_umap:
|
||||
list_for_each_entry(va_block, &cb->va_block_list, node) {
|
||||
if (offset <= 0)
|
||||
break;
|
||||
hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
|
||||
offset <= va_block->size);
|
||||
offset -= va_block->size;
|
||||
}
|
||||
|
||||
rc = hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
|
||||
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
|
||||
err_va_pool_free:
|
||||
list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
|
||||
gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
|
||||
list_del(&va_block->node);
|
||||
kfree(va_block);
|
||||
}
|
||||
|
||||
mutex_unlock(&hdev->mmu_lock);
|
||||
gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct hl_vm_va_block *va_block, *tmp;
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
|
||||
list_for_each_entry(va_block, &cb->va_block_list, node)
|
||||
if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
|
||||
list_is_last(&va_block->node,
|
||||
&cb->va_block_list)))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"Failed to unmap CB's va 0x%llx\n",
|
||||
va_block->start);
|
||||
|
||||
mutex_lock(&hdev->mmu_lock);
|
||||
hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size);
|
||||
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
|
||||
mutex_unlock(&hdev->mmu_lock);
|
||||
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
|
||||
list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
|
||||
gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
|
||||
list_del(&va_block->node);
|
||||
kfree(va_block);
|
||||
}
|
||||
gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
|
||||
}
|
||||
|
||||
static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
|
||||
@ -376,7 +313,6 @@ int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle)
|
||||
static int hl_cb_info(struct hl_mem_mgr *mmg,
|
||||
u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va)
|
||||
{
|
||||
struct hl_vm_va_block *va_block;
|
||||
struct hl_cb *cb;
|
||||
int rc = 0;
|
||||
|
||||
@ -388,9 +324,8 @@ static int hl_cb_info(struct hl_mem_mgr *mmg,
|
||||
}
|
||||
|
||||
if (flags & HL_CB_FLAGS_GET_DEVICE_VA) {
|
||||
va_block = list_first_entry(&cb->va_block_list, struct hl_vm_va_block, node);
|
||||
if (va_block) {
|
||||
*device_va = va_block->start;
|
||||
if (cb->is_mmu_mapped) {
|
||||
*device_va = cb->virtual_addr;
|
||||
} else {
|
||||
dev_err(mmg->dev, "CB is not mapped to the device's MMU\n");
|
||||
rc = -EINVAL;
|
||||
@ -566,16 +501,23 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr,
|
||||
prop->cb_va_end_addr - prop->cb_va_start_addr, -1);
|
||||
ctx->cb_va_pool_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
|
||||
CB_VA_POOL_SIZE, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
|
||||
if (!ctx->cb_va_pool_base) {
|
||||
rc = -ENOMEM;
|
||||
goto err_pool_destroy;
|
||||
}
|
||||
rc = gen_pool_add(ctx->cb_va_pool, ctx->cb_va_pool_base, CB_VA_POOL_SIZE, -1);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to add memory to VA gen pool for CB mapping\n");
|
||||
goto err_pool_destroy;
|
||||
goto err_unreserve_va_block;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_unreserve_va_block:
|
||||
hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
|
||||
err_pool_destroy:
|
||||
gen_pool_destroy(ctx->cb_va_pool);
|
||||
|
||||
@ -590,4 +532,5 @@ void hl_cb_va_pool_fini(struct hl_ctx *ctx)
|
||||
return;
|
||||
|
||||
gen_pool_destroy(ctx->cb_va_pool);
|
||||
hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
|
||||
}
|
||||
|
@ -12,7 +12,9 @@
|
||||
#include <linux/slab.h>
|
||||
|
||||
#define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
|
||||
HL_CS_FLAGS_COLLECTIVE_WAIT)
|
||||
HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
|
||||
HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND)
|
||||
|
||||
|
||||
#define MAX_TS_ITER_NUM 10
|
||||
|
||||
@ -824,10 +826,10 @@ static void cs_timedout(struct work_struct *work)
|
||||
}
|
||||
|
||||
/* Save only the first CS timeout parameters */
|
||||
rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_enable, 1, 0);
|
||||
rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0);
|
||||
if (rc) {
|
||||
hdev->last_error.cs_timeout.timestamp = ktime_get();
|
||||
hdev->last_error.cs_timeout.seq = cs->sequence;
|
||||
hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
|
||||
hdev->captured_err_info.cs_timeout.seq = cs->sequence;
|
||||
|
||||
event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT |
|
||||
HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT;
|
||||
@ -1242,6 +1244,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
|
||||
return CS_RESERVE_SIGNALS;
|
||||
else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
|
||||
return CS_UNRESERVE_SIGNALS;
|
||||
else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
|
||||
return CS_TYPE_ENGINE_CORE;
|
||||
else
|
||||
return CS_TYPE_DEFAULT;
|
||||
}
|
||||
@ -1253,6 +1257,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
|
||||
u32 cs_type_flags, num_chunks;
|
||||
enum hl_device_status status;
|
||||
enum hl_cs_type cs_type;
|
||||
bool is_sync_stream;
|
||||
|
||||
if (!hl_device_operational(hdev, &status)) {
|
||||
return -EBUSY;
|
||||
@ -1276,9 +1281,10 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
|
||||
cs_type = hl_cs_get_cs_type(cs_type_flags);
|
||||
num_chunks = args->in.num_chunks_execute;
|
||||
|
||||
if (unlikely((cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT ||
|
||||
cs_type == CS_TYPE_COLLECTIVE_WAIT) &&
|
||||
!hdev->supports_sync_stream)) {
|
||||
is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT ||
|
||||
cs_type == CS_TYPE_COLLECTIVE_WAIT);
|
||||
|
||||
if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) {
|
||||
dev_err(hdev->dev, "Sync stream CS is not supported\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -1288,7 +1294,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
|
||||
dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid);
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (num_chunks != 1) {
|
||||
} else if (is_sync_stream && num_chunks != 1) {
|
||||
dev_err(hdev->dev,
|
||||
"Sync stream CS mandates one chunk only, context %d\n",
|
||||
ctx->asid);
|
||||
@ -1584,13 +1590,14 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_ctx *ctx = hpriv->ctx;
|
||||
bool need_soft_reset = false;
|
||||
int rc = 0, do_ctx_switch;
|
||||
int rc = 0, do_ctx_switch = 0;
|
||||
void __user *chunks;
|
||||
u32 num_chunks, tmp;
|
||||
u16 sob_count;
|
||||
int ret;
|
||||
|
||||
do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
|
||||
if (hdev->supports_ctx_switch)
|
||||
do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
|
||||
|
||||
if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
|
||||
mutex_lock(&hpriv->restore_phase_mutex);
|
||||
@ -1661,9 +1668,10 @@ wait_again:
|
||||
}
|
||||
}
|
||||
|
||||
ctx->thread_ctx_switch_wait_token = 1;
|
||||
if (hdev->supports_ctx_switch)
|
||||
ctx->thread_ctx_switch_wait_token = 1;
|
||||
|
||||
} else if (!ctx->thread_ctx_switch_wait_token) {
|
||||
} else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) {
|
||||
rc = hl_poll_timeout_memory(hdev,
|
||||
&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
|
||||
100, jiffies_to_usecs(hdev->timeout_jiffies), false);
|
||||
@ -2351,6 +2359,41 @@ out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
|
||||
u32 num_engine_cores, u32 core_command)
|
||||
{
|
||||
int rc;
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
void __user *engine_cores_arr;
|
||||
u32 *cores;
|
||||
|
||||
if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
|
||||
dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) {
|
||||
dev_err(hdev->dev, "Engine core command is invalid\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
engine_cores_arr = (void __user *) (uintptr_t) engine_cores;
|
||||
cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL);
|
||||
if (!cores)
|
||||
return -ENOMEM;
|
||||
|
||||
if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) {
|
||||
dev_err(hdev->dev, "Failed to copy core-ids array from user\n");
|
||||
kfree(cores);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command);
|
||||
kfree(cores);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
{
|
||||
union hl_cs_args *args = data;
|
||||
@ -2403,6 +2446,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
rc = cs_ioctl_unreserve_signals(hpriv,
|
||||
args->in.encaps_sig_handle_id);
|
||||
break;
|
||||
case CS_TYPE_ENGINE_CORE:
|
||||
rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
|
||||
args->in.num_engine_cores, args->in.core_command);
|
||||
break;
|
||||
default:
|
||||
rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
|
||||
args->in.cs_flags,
|
||||
@ -2524,7 +2571,7 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_com
|
||||
ktime_t max_ktime, first_cs_time;
|
||||
enum hl_cs_wait_status status;
|
||||
|
||||
memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr));
|
||||
memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *));
|
||||
|
||||
/* get all fences under the same lock */
|
||||
rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
|
||||
@ -2826,7 +2873,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
}
|
||||
|
||||
/* allocate array for the fences */
|
||||
fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL);
|
||||
fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL);
|
||||
if (!fence_arr) {
|
||||
rc = -ENOMEM;
|
||||
goto free_seq_arr;
|
||||
|
@ -291,14 +291,16 @@ static int vm_show(struct seq_file *s, void *data)
|
||||
if (ctx->asid != HL_KERNEL_ASID_ID &&
|
||||
!list_empty(&ctx->hw_block_mem_list)) {
|
||||
seq_puts(s, "\nhw_block mappings:\n\n");
|
||||
seq_puts(s, " virtual address size HW block id\n");
|
||||
seq_puts(s, "-------------------------------------------\n");
|
||||
seq_puts(s,
|
||||
" virtual address block size mapped size HW block id\n");
|
||||
seq_puts(s,
|
||||
"---------------------------------------------------------------\n");
|
||||
mutex_lock(&ctx->hw_block_list_lock);
|
||||
list_for_each_entry(lnode, &ctx->hw_block_mem_list,
|
||||
node) {
|
||||
list_for_each_entry(lnode, &ctx->hw_block_mem_list, node) {
|
||||
seq_printf(s,
|
||||
" 0x%-14lx %-6u %-9u\n",
|
||||
lnode->vaddr, lnode->size, lnode->id);
|
||||
" 0x%-14lx %-6u %-6u %-9u\n",
|
||||
lnode->vaddr, lnode->block_size, lnode->mapped_size,
|
||||
lnode->id);
|
||||
}
|
||||
mutex_unlock(&ctx->hw_block_list_lock);
|
||||
}
|
||||
@ -591,6 +593,7 @@ static int engines_show(struct seq_file *s, void *data)
|
||||
struct hl_debugfs_entry *entry = s->private;
|
||||
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
|
||||
struct hl_device *hdev = dev_entry->hdev;
|
||||
struct engines_data eng_data;
|
||||
|
||||
if (hdev->reset_info.in_reset) {
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
@ -598,7 +601,25 @@ static int engines_show(struct seq_file *s, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
hdev->asic_funcs->is_device_idle(hdev, NULL, 0, s);
|
||||
eng_data.actual_size = 0;
|
||||
eng_data.allocated_buf_size = HL_ENGINES_DATA_MAX_SIZE;
|
||||
eng_data.buf = vmalloc(eng_data.allocated_buf_size);
|
||||
if (!eng_data.buf)
|
||||
return -ENOMEM;
|
||||
|
||||
hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data);
|
||||
|
||||
if (eng_data.actual_size > eng_data.allocated_buf_size) {
|
||||
dev_err(hdev->dev,
|
||||
"Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n",
|
||||
eng_data.actual_size, eng_data.allocated_buf_size);
|
||||
vfree(eng_data.buf);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
seq_write(s, eng_data.buf, eng_data.actual_size);
|
||||
|
||||
vfree(eng_data.buf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -13,6 +13,8 @@
|
||||
#include <linux/pci.h>
|
||||
#include <linux/hwmon.h>
|
||||
|
||||
#include <trace/events/habanalabs.h>
|
||||
|
||||
#define HL_RESET_DELAY_USEC 10000 /* 10ms */
|
||||
|
||||
enum dma_alloc_type {
|
||||
@ -26,8 +28,9 @@ enum dma_alloc_type {
|
||||
/*
|
||||
* hl_set_dram_bar- sets the bar to allow later access to address
|
||||
*
|
||||
* @hdev: pointer to habanalabs device structure
|
||||
* @hdev: pointer to habanalabs device structure.
|
||||
* @addr: the address the caller wants to access.
|
||||
* @region: the PCI region.
|
||||
*
|
||||
* @return: the old BAR base address on success, U64_MAX for failure.
|
||||
* The caller should set it back to the old address after use.
|
||||
@ -37,58 +40,64 @@ enum dma_alloc_type {
|
||||
* This function can be called also if the bar doesn't need to be set,
|
||||
* in that case it just won't change the base.
|
||||
*/
|
||||
static uint64_t hl_set_dram_bar(struct hl_device *hdev, u64 addr)
|
||||
static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u64 bar_base_addr;
|
||||
u64 bar_base_addr, old_base;
|
||||
|
||||
bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull);
|
||||
if (is_power_of_2(prop->dram_pci_bar_size))
|
||||
bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull);
|
||||
else
|
||||
bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) *
|
||||
prop->dram_pci_bar_size;
|
||||
|
||||
return hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
|
||||
old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
|
||||
|
||||
/* in case of success we need to update the new BAR base */
|
||||
if (old_base != U64_MAX)
|
||||
region->region_base = bar_base_addr;
|
||||
|
||||
return old_base;
|
||||
}
|
||||
|
||||
|
||||
static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
|
||||
enum debugfs_access_type acc_type, enum pci_region region_type)
|
||||
{
|
||||
struct pci_mem_region *region = &hdev->pci_mem_region[region_type];
|
||||
void __iomem *acc_addr;
|
||||
u64 old_base = 0, rc;
|
||||
|
||||
if (region_type == PCI_REGION_DRAM) {
|
||||
old_base = hl_set_dram_bar(hdev, addr);
|
||||
old_base = hl_set_dram_bar(hdev, addr, region);
|
||||
if (old_base == U64_MAX)
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
acc_addr = hdev->pcie_bar[region->bar_id] + addr - region->region_base +
|
||||
region->offset_in_bar;
|
||||
switch (acc_type) {
|
||||
case DEBUGFS_READ8:
|
||||
*val = readb(hdev->pcie_bar[region->bar_id] +
|
||||
addr - region->region_base + region->offset_in_bar);
|
||||
*val = readb(acc_addr);
|
||||
break;
|
||||
case DEBUGFS_WRITE8:
|
||||
writeb(*val, hdev->pcie_bar[region->bar_id] +
|
||||
addr - region->region_base + region->offset_in_bar);
|
||||
writeb(*val, acc_addr);
|
||||
break;
|
||||
case DEBUGFS_READ32:
|
||||
*val = readl(hdev->pcie_bar[region->bar_id] +
|
||||
addr - region->region_base + region->offset_in_bar);
|
||||
*val = readl(acc_addr);
|
||||
break;
|
||||
case DEBUGFS_WRITE32:
|
||||
writel(*val, hdev->pcie_bar[region->bar_id] +
|
||||
addr - region->region_base + region->offset_in_bar);
|
||||
writel(*val, acc_addr);
|
||||
break;
|
||||
case DEBUGFS_READ64:
|
||||
*val = readq(hdev->pcie_bar[region->bar_id] +
|
||||
addr - region->region_base + region->offset_in_bar);
|
||||
*val = readq(acc_addr);
|
||||
break;
|
||||
case DEBUGFS_WRITE64:
|
||||
writeq(*val, hdev->pcie_bar[region->bar_id] +
|
||||
addr - region->region_base + region->offset_in_bar);
|
||||
writeq(*val, acc_addr);
|
||||
break;
|
||||
}
|
||||
|
||||
if (region_type == PCI_REGION_DRAM) {
|
||||
rc = hl_set_dram_bar(hdev, old_base);
|
||||
rc = hl_set_dram_bar(hdev, old_base, region);
|
||||
if (rc == U64_MAX)
|
||||
return -EIO;
|
||||
}
|
||||
@ -97,9 +106,10 @@ static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val
|
||||
}
|
||||
|
||||
static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
|
||||
gfp_t flag, enum dma_alloc_type alloc_type)
|
||||
gfp_t flag, enum dma_alloc_type alloc_type,
|
||||
const char *caller)
|
||||
{
|
||||
void *ptr;
|
||||
void *ptr = NULL;
|
||||
|
||||
switch (alloc_type) {
|
||||
case DMA_ALLOC_COHERENT:
|
||||
@ -113,11 +123,16 @@ static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t
|
||||
break;
|
||||
}
|
||||
|
||||
if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr))
|
||||
trace_habanalabs_dma_alloc(hdev->dev, (u64) (uintptr_t) ptr, *dma_handle, size,
|
||||
caller);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *cpu_addr,
|
||||
dma_addr_t dma_handle, enum dma_alloc_type alloc_type)
|
||||
dma_addr_t dma_handle, enum dma_alloc_type alloc_type,
|
||||
const char *caller)
|
||||
{
|
||||
switch (alloc_type) {
|
||||
case DMA_ALLOC_COHERENT:
|
||||
@ -130,39 +145,44 @@ static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *c
|
||||
hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle);
|
||||
break;
|
||||
}
|
||||
|
||||
trace_habanalabs_dma_free(hdev->dev, (u64) (uintptr_t) cpu_addr, dma_handle, size, caller);
|
||||
}
|
||||
|
||||
void *hl_asic_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
|
||||
gfp_t flag)
|
||||
void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
|
||||
gfp_t flag, const char *caller)
|
||||
{
|
||||
return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT);
|
||||
return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT, caller);
|
||||
}
|
||||
|
||||
void hl_asic_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr,
|
||||
dma_addr_t dma_handle)
|
||||
void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr,
|
||||
dma_addr_t dma_handle, const char *caller)
|
||||
{
|
||||
hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT);
|
||||
hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT, caller);
|
||||
}
|
||||
|
||||
void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle)
|
||||
void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size,
|
||||
dma_addr_t *dma_handle, const char *caller)
|
||||
{
|
||||
return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE);
|
||||
return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller);
|
||||
}
|
||||
|
||||
void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
|
||||
void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr,
|
||||
const char *caller)
|
||||
{
|
||||
hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE);
|
||||
hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller);
|
||||
}
|
||||
|
||||
void *hl_asic_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags,
|
||||
dma_addr_t *dma_handle)
|
||||
void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags,
|
||||
dma_addr_t *dma_handle, const char *caller)
|
||||
{
|
||||
return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL);
|
||||
return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL, caller);
|
||||
}
|
||||
|
||||
void hl_asic_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
|
||||
void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
|
||||
const char *caller)
|
||||
{
|
||||
hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL);
|
||||
hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL, caller);
|
||||
}
|
||||
|
||||
int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
|
||||
@ -267,6 +287,30 @@ int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
int str_size;
|
||||
|
||||
va_start(args, fmt);
|
||||
/* Calculate formatted string length. Assuming each string is null terminated, hence
|
||||
* increment result by 1
|
||||
*/
|
||||
str_size = vsnprintf(NULL, 0, fmt, args) + 1;
|
||||
va_end(args);
|
||||
|
||||
if ((e->actual_size + str_size) < e->allocated_buf_size) {
|
||||
va_start(args, fmt);
|
||||
vsnprintf(e->buf + e->actual_size, str_size, fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
/* Need to update the size even when not updating destination buffer to get the exact size
|
||||
* of all input strings
|
||||
*/
|
||||
e->actual_size += str_size;
|
||||
}
|
||||
|
||||
enum hl_device_status hl_device_status(struct hl_device *hdev)
|
||||
{
|
||||
enum hl_device_status status;
|
||||
@ -322,6 +366,8 @@ static void hpriv_release(struct kref *ref)
|
||||
|
||||
hdev = hpriv->hdev;
|
||||
|
||||
hdev->asic_funcs->send_device_activity(hdev, false);
|
||||
|
||||
put_pid(hpriv->taskpid);
|
||||
|
||||
hl_debugfs_remove_file(hpriv);
|
||||
@ -673,7 +719,7 @@ static int device_early_init(struct hl_device *hdev)
|
||||
|
||||
if (hdev->asic_prop.completion_queues_count) {
|
||||
hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
|
||||
sizeof(*hdev->cq_wq),
|
||||
sizeof(struct workqueue_struct *),
|
||||
GFP_KERNEL);
|
||||
if (!hdev->cq_wq) {
|
||||
rc = -ENOMEM;
|
||||
@ -1091,7 +1137,9 @@ int hl_device_resume(struct hl_device *hdev)
|
||||
/* 'in_reset' was set to true during suspend, now we must clear it in order
|
||||
* for hard reset to be performed
|
||||
*/
|
||||
spin_lock(&hdev->reset_info.lock);
|
||||
hdev->reset_info.in_reset = 0;
|
||||
spin_unlock(&hdev->reset_info.lock);
|
||||
|
||||
rc = hl_device_reset(hdev, HL_DRV_RESET_HARD);
|
||||
if (rc) {
|
||||
@ -1518,6 +1566,13 @@ kill_processes:
|
||||
*/
|
||||
hdev->disabled = false;
|
||||
|
||||
/* F/W security enabled indication might be updated after hard-reset */
|
||||
if (hard_reset) {
|
||||
rc = hl_fw_read_preboot_status(hdev);
|
||||
if (rc)
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
rc = hdev->asic_funcs->hw_init(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to initialize the H/W after reset\n");
|
||||
@ -1556,7 +1611,7 @@ kill_processes:
|
||||
if (!hdev->asic_prop.fw_security_enabled)
|
||||
hl_fw_set_max_power(hdev);
|
||||
} else {
|
||||
rc = hdev->asic_funcs->non_hard_reset_late_init(hdev);
|
||||
rc = hdev->asic_funcs->compute_reset_late_init(hdev);
|
||||
if (rc) {
|
||||
if (reset_upon_device_release)
|
||||
dev_err(hdev->dev,
|
||||
@ -1704,7 +1759,9 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
||||
char *name;
|
||||
bool add_cdev_sysfs_on_err = false;
|
||||
|
||||
name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
|
||||
hdev->cdev_idx = hdev->id / 2;
|
||||
|
||||
name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx);
|
||||
if (!name) {
|
||||
rc = -ENOMEM;
|
||||
goto out_disabled;
|
||||
@ -1719,7 +1776,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
||||
if (rc)
|
||||
goto out_disabled;
|
||||
|
||||
name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
|
||||
name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx);
|
||||
if (!name) {
|
||||
rc = -ENOMEM;
|
||||
goto free_dev;
|
||||
@ -1806,7 +1863,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
||||
}
|
||||
|
||||
hdev->shadow_cs_queue = kcalloc(hdev->asic_prop.max_pending_cs,
|
||||
sizeof(*hdev->shadow_cs_queue), GFP_KERNEL);
|
||||
sizeof(struct hl_cs *), GFP_KERNEL);
|
||||
if (!hdev->shadow_cs_queue) {
|
||||
rc = -ENOMEM;
|
||||
goto cq_fini;
|
||||
@ -1997,10 +2054,10 @@ out_disabled:
|
||||
if (hdev->pdev)
|
||||
dev_err(&hdev->pdev->dev,
|
||||
"Failed to initialize hl%d. Device is NOT usable !\n",
|
||||
hdev->id / 2);
|
||||
hdev->cdev_idx);
|
||||
else
|
||||
pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
|
||||
hdev->id / 2);
|
||||
hdev->cdev_idx);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
@ -15,14 +15,6 @@
|
||||
|
||||
#define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */
|
||||
|
||||
struct fw_binning_conf {
|
||||
u64 tpc_binning;
|
||||
u32 dec_binning;
|
||||
u32 hbm_binning;
|
||||
u32 edma_binning;
|
||||
u32 mme_redundancy;
|
||||
};
|
||||
|
||||
static char *extract_fw_ver_from_str(const char *fw_str)
|
||||
{
|
||||
char *str, *fw_ver, *whitespace;
|
||||
@ -260,7 +252,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
|
||||
struct cpucp_packet *pkt;
|
||||
dma_addr_t pkt_dma_addr;
|
||||
struct hl_bd *sent_bd;
|
||||
u32 tmp, expected_ack_val, pi;
|
||||
u32 tmp, expected_ack_val, pi, opcode;
|
||||
int rc;
|
||||
|
||||
pkt = hl_cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr);
|
||||
@ -327,8 +319,35 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
|
||||
|
||||
rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
|
||||
if (rc) {
|
||||
dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
|
||||
rc, (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
opcode = (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT;
|
||||
|
||||
if (!prop->supports_advanced_cpucp_rc) {
|
||||
dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n", rc, opcode);
|
||||
goto scrub_descriptor;
|
||||
}
|
||||
|
||||
switch (rc) {
|
||||
case cpucp_packet_invalid:
|
||||
dev_err(hdev->dev,
|
||||
"CPU packet %d is not supported by F/W\n", opcode);
|
||||
break;
|
||||
case cpucp_packet_fault:
|
||||
dev_err(hdev->dev,
|
||||
"F/W failed processing CPU packet %d\n", opcode);
|
||||
break;
|
||||
case cpucp_packet_invalid_pkt:
|
||||
dev_dbg(hdev->dev,
|
||||
"CPU packet %d is not supported by F/W\n", opcode);
|
||||
break;
|
||||
case cpucp_packet_invalid_params:
|
||||
dev_err(hdev->dev,
|
||||
"F/W reports invalid parameters for CPU packet %d\n", opcode);
|
||||
break;
|
||||
|
||||
default:
|
||||
dev_err(hdev->dev,
|
||||
"Unknown F/W ERROR %d for CPU packet %d\n", rc, opcode);
|
||||
}
|
||||
|
||||
/* propagate the return code from the f/w to the callers who want to check it */
|
||||
if (result)
|
||||
@ -340,6 +359,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
|
||||
*result = le64_to_cpu(pkt->result);
|
||||
}
|
||||
|
||||
scrub_descriptor:
|
||||
/* Scrub previous buffer descriptor 'ctl' field which contains the
|
||||
* previous PI value written during packet submission.
|
||||
* We must do this or else F/W can read an old value upon queue wraparound.
|
||||
@ -462,6 +482,21 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
|
||||
size);
|
||||
}
|
||||
|
||||
int hl_fw_send_device_activity(struct hl_device *hdev, bool open)
|
||||
{
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_ACTIVE_STATUS_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.value = cpu_to_le64(open);
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "failed to send device activity msg(%u)\n", open);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int hl_fw_send_heartbeat(struct hl_device *hdev)
|
||||
{
|
||||
struct cpucp_packet hb_pkt;
|
||||
@ -581,6 +616,15 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
|
||||
dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val);
|
||||
|
||||
/* All warnings should go here in order not to reach the unknown error validation */
|
||||
if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) {
|
||||
dev_warn(hdev->dev,
|
||||
"Device boot warning - EEPROM failure detected, default settings applied\n");
|
||||
/* This is a warning so we don't want it to disable the
|
||||
* device
|
||||
*/
|
||||
err_val &= ~CPU_BOOT_ERR0_EEPROM_FAIL;
|
||||
}
|
||||
|
||||
if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
|
||||
dev_warn(hdev->dev,
|
||||
"Device boot warning - Skipped DRAM initialization\n");
|
||||
@ -1476,6 +1520,8 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
|
||||
*/
|
||||
prop->hard_reset_done_by_fw = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
|
||||
|
||||
prop->fw_security_enabled = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_SECURITY_EN);
|
||||
|
||||
dev_dbg(hdev->dev, "Firmware preboot boot device status0 %#x\n",
|
||||
cpu_boot_dev_sts0);
|
||||
|
||||
@ -1514,7 +1560,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev)
|
||||
hdev->asic_funcs->init_firmware_preload_params(hdev);
|
||||
|
||||
/*
|
||||
* In order to determine boot method (static VS dymanic) we need to
|
||||
* In order to determine boot method (static VS dynamic) we need to
|
||||
* read the boot caps register
|
||||
*/
|
||||
rc = hl_fw_read_preboot_caps(hdev);
|
||||
@ -1781,7 +1827,7 @@ int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
|
||||
*
|
||||
* @return the CRC32 result
|
||||
*
|
||||
* NOTE: kernel's CRC32 differ's from standard CRC32 calculation.
|
||||
* NOTE: kernel's CRC32 differs from standard CRC32 calculation.
|
||||
* in order to be aligned we need to flip the bits of both the input
|
||||
* initial CRC and kernel's CRC32 result.
|
||||
* in addition both sides use initial CRC of 0,
|
||||
@ -1798,7 +1844,7 @@ static u32 hl_fw_compat_crc32(u8 *data, size_t size)
|
||||
*
|
||||
* @hdev: pointer to the habanalabs device structure
|
||||
* @addr: device address of memory transfer
|
||||
* @size: memory transter size
|
||||
* @size: memory transfer size
|
||||
* @region: PCI memory region
|
||||
*
|
||||
* @return 0 on success, otherwise non-zero error code
|
||||
@ -1854,50 +1900,36 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev,
|
||||
u64 addr;
|
||||
int rc;
|
||||
|
||||
if (le32_to_cpu(fw_desc->header.magic) != HL_COMMS_DESC_MAGIC) {
|
||||
dev_err(hdev->dev, "Invalid magic for dynamic FW descriptor (%x)\n",
|
||||
if (le32_to_cpu(fw_desc->header.magic) != HL_COMMS_DESC_MAGIC)
|
||||
dev_warn(hdev->dev, "Invalid magic for dynamic FW descriptor (%x)\n",
|
||||
fw_desc->header.magic);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (fw_desc->header.version != HL_COMMS_DESC_VER) {
|
||||
dev_err(hdev->dev, "Invalid version for dynamic FW descriptor (%x)\n",
|
||||
if (fw_desc->header.version != HL_COMMS_DESC_VER)
|
||||
dev_warn(hdev->dev, "Invalid version for dynamic FW descriptor (%x)\n",
|
||||
fw_desc->header.version);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* calc CRC32 of data without header.
|
||||
* Calc CRC32 of data without header. use the size of the descriptor
|
||||
* reported by firmware, without calculating it ourself, to allow adding
|
||||
* more fields to the lkd_fw_comms_desc structure.
|
||||
* note that no alignment/stride address issues here as all structures
|
||||
* are 64 bit padded
|
||||
* are 64 bit padded.
|
||||
*/
|
||||
data_size = sizeof(struct lkd_fw_comms_desc) -
|
||||
sizeof(struct comms_desc_header);
|
||||
data_ptr = (u8 *)fw_desc + sizeof(struct comms_desc_header);
|
||||
|
||||
if (le16_to_cpu(fw_desc->header.size) != data_size) {
|
||||
dev_err(hdev->dev,
|
||||
"Invalid descriptor size 0x%x, expected size 0x%zx\n",
|
||||
le16_to_cpu(fw_desc->header.size), data_size);
|
||||
return -EIO;
|
||||
}
|
||||
data_size = le16_to_cpu(fw_desc->header.size);
|
||||
|
||||
data_crc32 = hl_fw_compat_crc32(data_ptr, data_size);
|
||||
|
||||
if (data_crc32 != le32_to_cpu(fw_desc->header.crc32)) {
|
||||
dev_err(hdev->dev,
|
||||
"CRC32 mismatch for dynamic FW descriptor (%x:%x)\n",
|
||||
data_crc32, fw_desc->header.crc32);
|
||||
dev_err(hdev->dev, "CRC32 mismatch for dynamic FW descriptor (%x:%x)\n",
|
||||
data_crc32, fw_desc->header.crc32);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* find memory region to which to copy the image */
|
||||
addr = le64_to_cpu(fw_desc->img_addr);
|
||||
region_id = hl_get_pci_memory_region(hdev, addr);
|
||||
if ((region_id != PCI_REGION_SRAM) &&
|
||||
((region_id != PCI_REGION_DRAM))) {
|
||||
dev_err(hdev->dev,
|
||||
"Invalid region to copy FW image address=%llx\n", addr);
|
||||
if ((region_id != PCI_REGION_SRAM) && ((region_id != PCI_REGION_DRAM))) {
|
||||
dev_err(hdev->dev, "Invalid region to copy FW image address=%llx\n", addr);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
@ -1914,8 +1946,7 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev,
|
||||
fw_loader->dynamic_loader.fw_image_size,
|
||||
region);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"invalid mem transfer request for FW image\n");
|
||||
dev_err(hdev->dev, "invalid mem transfer request for FW image\n");
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -2422,18 +2453,6 @@ static int hl_fw_dynamic_send_msg(struct hl_device *hdev,
|
||||
msg.reset_cause = *(__u8 *) data;
|
||||
break;
|
||||
|
||||
case HL_COMMS_BINNING_CONF_TYPE:
|
||||
{
|
||||
struct fw_binning_conf *binning_conf = (struct fw_binning_conf *) data;
|
||||
|
||||
msg.tpc_binning_conf = cpu_to_le64(binning_conf->tpc_binning);
|
||||
msg.dec_binning_conf = cpu_to_le32(binning_conf->dec_binning);
|
||||
msg.hbm_binning_conf = cpu_to_le32(binning_conf->hbm_binning);
|
||||
msg.edma_binning_conf = cpu_to_le32(binning_conf->edma_binning);
|
||||
msg.mme_redundancy_conf = cpu_to_le32(binning_conf->mme_redundancy);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
dev_err(hdev->dev,
|
||||
"Send COMMS message - invalid message type %u\n",
|
||||
@ -2503,13 +2522,6 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
|
||||
*/
|
||||
dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
|
||||
|
||||
/* if no preboot loaded indication- wait for preboot */
|
||||
if (!(hdev->fw_loader.fw_comp_loaded & FW_TYPE_PREBOOT_CPU)) {
|
||||
rc = hl_fw_wait_preboot_ready(hdev);
|
||||
if (rc)
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE,
|
||||
0, true,
|
||||
fw_loader->cpu_timeout);
|
||||
@ -2547,7 +2559,7 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
|
||||
/*
|
||||
* when testing FW load (without Linux) on PLDM we don't want to
|
||||
* wait until boot fit is active as it may take several hours.
|
||||
* instead, we load the bootfit and let it do all initializations in
|
||||
* instead, we load the bootfit and let it do all initialization in
|
||||
* the background.
|
||||
*/
|
||||
if (hdev->pldm && !(hdev->fw_components & FW_TYPE_LINUX))
|
||||
@ -2961,3 +2973,49 @@ void hl_fw_set_max_power(struct hl_device *hdev)
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
|
||||
}
|
||||
|
||||
static int hl_fw_get_sec_attest_data(struct hl_device *hdev, u32 packet_id, void *data, u32 size,
|
||||
u32 nonce, u32 timeout)
|
||||
{
|
||||
struct cpucp_packet pkt = {};
|
||||
dma_addr_t req_dma_addr;
|
||||
void *req_cpu_addr;
|
||||
int rc;
|
||||
|
||||
req_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, size, &req_dma_addr);
|
||||
if (!data) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate DMA memory for CPU-CP packet %u\n", packet_id);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
memset(data, 0, size);
|
||||
|
||||
pkt.ctl = cpu_to_le32(packet_id << CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.addr = cpu_to_le64(req_dma_addr);
|
||||
pkt.data_max_size = cpu_to_le32(size);
|
||||
pkt.nonce = cpu_to_le32(nonce);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
timeout, NULL);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to handle CPU-CP pkt %u, error %d\n", packet_id, rc);
|
||||
goto out;
|
||||
}
|
||||
|
||||
memcpy(data, req_cpu_addr, size);
|
||||
|
||||
out:
|
||||
hl_cpu_accessible_dma_pool_free(hdev, size, req_cpu_addr);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info,
|
||||
u32 nonce)
|
||||
{
|
||||
return hl_fw_get_sec_attest_data(hdev, CPUCP_PACKET_SEC_ATTEST_GET, sec_attest_info,
|
||||
sizeof(struct cpucp_sec_attest_info), nonce,
|
||||
HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC);
|
||||
}
|
||||
|
@ -66,6 +66,7 @@ struct hl_fpriv;
|
||||
#define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
|
||||
#define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
|
||||
#define HL_CPUCP_MON_DUMP_TIMEOUT_USEC 10000000 /* 10s */
|
||||
#define HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC 10000000 /* 10s */
|
||||
|
||||
#define HL_FW_STATUS_POLL_INTERVAL_USEC 10000 /* 10ms */
|
||||
#define HL_FW_COMMS_STATUS_PLDM_POLL_INTERVAL_USEC 1000000 /* 1s */
|
||||
@ -94,7 +95,7 @@ struct hl_fpriv;
|
||||
#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
|
||||
|
||||
/**
|
||||
* enum hl_mmu_page_table_locaion - mmu page table location
|
||||
* enum hl_mmu_page_table_location - mmu page table location
|
||||
* @MMU_DR_PGT: page-table is located on device DRAM.
|
||||
* @MMU_HR_PGT: page-table is located on host memory.
|
||||
* @MMU_NUM_PGT_LOCATIONS: number of page-table locations currently supported.
|
||||
@ -143,6 +144,25 @@ enum hl_mmu_enablement {
|
||||
|
||||
#define HL_MAX_DCORES 8
|
||||
|
||||
/* DMA alloc/free wrappers */
|
||||
#define hl_asic_dma_alloc_coherent(hdev, size, dma_handle, flags) \
|
||||
hl_asic_dma_alloc_coherent_caller(hdev, size, dma_handle, flags, __func__)
|
||||
|
||||
#define hl_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle) \
|
||||
hl_cpu_accessible_dma_pool_alloc_caller(hdev, size, dma_handle, __func__)
|
||||
|
||||
#define hl_asic_dma_pool_zalloc(hdev, size, mem_flags, dma_handle) \
|
||||
hl_asic_dma_pool_zalloc_caller(hdev, size, mem_flags, dma_handle, __func__)
|
||||
|
||||
#define hl_asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle) \
|
||||
hl_asic_dma_free_coherent_caller(hdev, size, cpu_addr, dma_handle, __func__)
|
||||
|
||||
#define hl_cpu_accessible_dma_pool_free(hdev, size, vaddr) \
|
||||
hl_cpu_accessible_dma_pool_free_caller(hdev, size, vaddr, __func__)
|
||||
|
||||
#define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \
|
||||
hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__)
|
||||
|
||||
/*
|
||||
* Reset Flags
|
||||
*
|
||||
@ -208,6 +228,7 @@ enum hl_protection_levels {
|
||||
* struct iterate_module_ctx - HW module iterator
|
||||
* @fn: function to apply to each HW module instance
|
||||
* @data: optional internal data to the function iterator
|
||||
* @rc: return code for optional use of iterator/iterator-caller
|
||||
*/
|
||||
struct iterate_module_ctx {
|
||||
/*
|
||||
@ -217,10 +238,12 @@ struct iterate_module_ctx {
|
||||
* @inst: HW module instance within the block
|
||||
* @offset: current HW module instance offset from the 1-st HW module instance
|
||||
* in the 1-st block
|
||||
* @data: function specific data
|
||||
* @ctx: the iterator context.
|
||||
*/
|
||||
void (*fn)(struct hl_device *hdev, int block, int inst, u32 offset, void *data);
|
||||
void (*fn)(struct hl_device *hdev, int block, int inst, u32 offset,
|
||||
struct iterate_module_ctx *ctx);
|
||||
void *data;
|
||||
int rc;
|
||||
};
|
||||
|
||||
struct hl_block_glbl_sec {
|
||||
@ -342,7 +365,8 @@ enum hl_cs_type {
|
||||
CS_TYPE_WAIT,
|
||||
CS_TYPE_COLLECTIVE_WAIT,
|
||||
CS_RESERVE_SIGNALS,
|
||||
CS_UNRESERVE_SIGNALS
|
||||
CS_UNRESERVE_SIGNALS,
|
||||
CS_TYPE_ENGINE_CORE
|
||||
};
|
||||
|
||||
/*
|
||||
@ -544,10 +568,6 @@ struct hl_hints_range {
|
||||
* @tpc_binning_mask: which TPCs are binned. 0 means usable and 1 means binned.
|
||||
* @dram_enabled_mask: which DRAMs are enabled.
|
||||
* @dram_binning_mask: which DRAMs are binned. 0 means usable, 1 means binned.
|
||||
* @cb_va_start_addr: virtual start address of command buffers which are mapped
|
||||
* to the device's MMU.
|
||||
* @cb_va_end_addr: virtual end address of command buffers which are mapped to
|
||||
* the device's MMU.
|
||||
* @dram_hints_align_mask: dram va hint addresses alignment mask which is used
|
||||
* for hints validity check.
|
||||
* @cfg_base_address: config space base address.
|
||||
@ -614,6 +634,7 @@ struct hl_hints_range {
|
||||
* which the property supports_user_set_page_size is true
|
||||
* (i.e. the DRAM supports multiple page sizes), otherwise
|
||||
* it will shall be equal to dram_page_size.
|
||||
* @num_engine_cores: number of engine cpu cores
|
||||
* @collective_first_sob: first sync object available for collective use
|
||||
* @collective_first_mon: first monitor available for collective use
|
||||
* @sync_stream_first_sob: first sync object available for sync stream use
|
||||
@ -658,6 +679,7 @@ struct hl_hints_range {
|
||||
* @set_max_power_on_device_init: true if need to set max power in F/W on device init.
|
||||
* @supports_user_set_page_size: true if user can set the allocation page size.
|
||||
* @dma_mask: the dma mask to be set for this device
|
||||
* @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
|
||||
*/
|
||||
struct asic_fixed_properties {
|
||||
struct hw_queue_properties *hw_queues_props;
|
||||
@ -689,8 +711,6 @@ struct asic_fixed_properties {
|
||||
u64 tpc_binning_mask;
|
||||
u64 dram_enabled_mask;
|
||||
u64 dram_binning_mask;
|
||||
u64 cb_va_start_addr;
|
||||
u64 cb_va_end_addr;
|
||||
u64 dram_hints_align_mask;
|
||||
u64 cfg_base_address;
|
||||
u64 mmu_cache_mng_addr;
|
||||
@ -734,6 +754,7 @@ struct asic_fixed_properties {
|
||||
u32 faulty_dram_cluster_map;
|
||||
u32 xbar_edge_enabled_mask;
|
||||
u32 device_mem_alloc_default_page_size;
|
||||
u32 num_engine_cores;
|
||||
u16 collective_first_sob;
|
||||
u16 collective_first_mon;
|
||||
u16 sync_stream_first_sob;
|
||||
@ -766,6 +787,7 @@ struct asic_fixed_properties {
|
||||
u8 set_max_power_on_device_init;
|
||||
u8 supports_user_set_page_size;
|
||||
u8 dma_mask;
|
||||
u8 supports_advanced_cpucp_rc;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -797,7 +819,7 @@ struct hl_fence {
|
||||
* @lock: spinlock to protect fence.
|
||||
* @hdev: habanalabs device structure.
|
||||
* @hw_sob: the H/W SOB used in this signal/wait CS.
|
||||
* @encaps_sig_hdl: encaps signals hanlder.
|
||||
* @encaps_sig_hdl: encaps signals handler.
|
||||
* @cs_seq: command submission sequence number.
|
||||
* @type: type of the CS - signal/wait.
|
||||
* @sob_val: the SOB value that is used in this signal/wait CS.
|
||||
@ -898,14 +920,14 @@ struct hl_mmap_mem_buf {
|
||||
* @buf: back pointer to the parent mappable memory buffer
|
||||
* @debugfs_list: node in debugfs list of command buffers.
|
||||
* @pool_list: node in pool list of command buffers.
|
||||
* @va_block_list: list of virtual addresses blocks of the CB if it is mapped to
|
||||
* the device's MMU.
|
||||
* @kernel_address: Holds the CB's kernel virtual address.
|
||||
* @virtual_addr: Holds the CB's virtual address.
|
||||
* @bus_address: Holds the CB's DMA address.
|
||||
* @size: holds the CB's size.
|
||||
* @roundup_size: holds the cb size after roundup to page size.
|
||||
* @cs_cnt: holds number of CS that this CB participates in.
|
||||
* @is_pool: true if CB was acquired from the pool, false otherwise.
|
||||
* @is_internal: internaly allocated
|
||||
* @is_internal: internally allocated
|
||||
* @is_mmu_mapped: true if the CB is mapped to the device's MMU.
|
||||
*/
|
||||
struct hl_cb {
|
||||
@ -914,10 +936,11 @@ struct hl_cb {
|
||||
struct hl_mmap_mem_buf *buf;
|
||||
struct list_head debugfs_list;
|
||||
struct list_head pool_list;
|
||||
struct list_head va_block_list;
|
||||
void *kernel_address;
|
||||
u64 virtual_addr;
|
||||
dma_addr_t bus_address;
|
||||
u32 size;
|
||||
u32 roundup_size;
|
||||
atomic_t cs_cnt;
|
||||
u8 is_pool;
|
||||
u8 is_internal;
|
||||
@ -1113,7 +1136,7 @@ struct timestamp_reg_info {
|
||||
* @fence: hl fence object for interrupt completion
|
||||
* @cq_target_value: CQ target value
|
||||
* @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt
|
||||
* handler for taget value comparison
|
||||
* handler for target value comparison
|
||||
*/
|
||||
struct hl_user_pending_interrupt {
|
||||
struct timestamp_reg_info ts_reg_info;
|
||||
@ -1371,6 +1394,18 @@ struct fw_load_mgr {
|
||||
|
||||
struct hl_cs;
|
||||
|
||||
/**
|
||||
* struct engines_data - asic engines data
|
||||
* @buf: buffer for engines data in ascii
|
||||
* @actual_size: actual size of data that was written by the driver to the allocated buffer
|
||||
* @allocated_buf_size: total size of allocated buffer
|
||||
*/
|
||||
struct engines_data {
|
||||
char *buf;
|
||||
int actual_size;
|
||||
u32 allocated_buf_size;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_asic_funcs - ASIC specific functions that are can be called from
|
||||
* common code.
|
||||
@ -1434,11 +1469,9 @@ struct hl_cs;
|
||||
* @send_heartbeat: send is-alive packet to CPU-CP and verify response.
|
||||
* @debug_coresight: perform certain actions on Coresight for debugging.
|
||||
* @is_device_idle: return true if device is idle, false otherwise.
|
||||
* @non_hard_reset_late_init: perform certain actions needed after a reset which is not hard-reset
|
||||
* @compute_reset_late_init: perform certain actions needed after a compute reset
|
||||
* @hw_queues_lock: acquire H/W queues lock.
|
||||
* @hw_queues_unlock: release H/W queues lock.
|
||||
* @kdma_lock: acquire H/W queues lock. Relevant from GRECO ASIC
|
||||
* @kdma_unlock: release H/W queues lock. Relevant from GRECO ASIC
|
||||
* @get_pci_id: retrieve PCI ID.
|
||||
* @get_eeprom_data: retrieve EEPROM data from F/W.
|
||||
* @get_monitor_dump: retrieve monitor registers dump from F/W.
|
||||
@ -1498,6 +1531,8 @@ struct hl_cs;
|
||||
* @check_if_razwi_happened: check if there was a razwi due to RR violation.
|
||||
* @access_dev_mem: access device memory
|
||||
* @set_dram_bar_base: set the base of the DRAM BAR
|
||||
* @set_engine_cores: set a config command to enigne cores
|
||||
* @send_device_activity: indication to FW about device availability
|
||||
*/
|
||||
struct hl_asic_funcs {
|
||||
int (*early_init)(struct hl_device *hdev);
|
||||
@ -1570,13 +1605,11 @@ struct hl_asic_funcs {
|
||||
int (*mmu_prefetch_cache_range)(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size);
|
||||
int (*send_heartbeat)(struct hl_device *hdev);
|
||||
int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
|
||||
bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr,
|
||||
u8 mask_len, struct seq_file *s);
|
||||
int (*non_hard_reset_late_init)(struct hl_device *hdev);
|
||||
bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
|
||||
struct engines_data *e);
|
||||
int (*compute_reset_late_init)(struct hl_device *hdev);
|
||||
void (*hw_queues_lock)(struct hl_device *hdev);
|
||||
void (*hw_queues_unlock)(struct hl_device *hdev);
|
||||
void (*kdma_lock)(struct hl_device *hdev, int dcore_id);
|
||||
void (*kdma_unlock)(struct hl_device *hdev, int dcore_id);
|
||||
u32 (*get_pci_id)(struct hl_device *hdev);
|
||||
int (*get_eeprom_data)(struct hl_device *hdev, void *data, size_t max_size);
|
||||
int (*get_monitor_dump)(struct hl_device *hdev, void *data);
|
||||
@ -1634,6 +1667,9 @@ struct hl_asic_funcs {
|
||||
int (*access_dev_mem)(struct hl_device *hdev, enum pci_region region_type,
|
||||
u64 addr, u64 *val, enum debugfs_access_type acc_type);
|
||||
u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
|
||||
int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids,
|
||||
u32 num_cores, u32 core_command);
|
||||
int (*send_device_activity)(struct hl_device *hdev, bool open);
|
||||
};
|
||||
|
||||
|
||||
@ -1727,10 +1763,10 @@ struct hl_cs_outcome {
|
||||
|
||||
/**
|
||||
* struct hl_cs_outcome_store - represents a limited store of completed CS outcomes
|
||||
* @outcome_map: index of completed CS searcheable by sequence number
|
||||
* @outcome_map: index of completed CS searchable by sequence number
|
||||
* @used_list: list of outcome objects currently in use
|
||||
* @free_list: list of outcome objects currently not in use
|
||||
* @nodes_pool: a static pool of preallocated outcome objects
|
||||
* @nodes_pool: a static pool of pre-allocated outcome objects
|
||||
* @db_lock: any operation on the store must take this lock
|
||||
*/
|
||||
struct hl_cs_outcome_store {
|
||||
@ -1754,12 +1790,10 @@ struct hl_cs_outcome_store {
|
||||
* @refcount: reference counter for the context. Context is released only when
|
||||
* this hits 0l. It is incremented on CS and CS_WAIT.
|
||||
* @cs_pending: array of hl fence objects representing pending CS.
|
||||
* @outcome_store: storage data structure used to remember ouitcomes of completed
|
||||
* @outcome_store: storage data structure used to remember outcomes of completed
|
||||
* command submissions for a long time after CS id wraparound.
|
||||
* @va_range: holds available virtual addresses for host and dram mappings.
|
||||
* @mem_hash_lock: protects the mem_hash.
|
||||
* @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
|
||||
* MMU hash or walking the PGT requires talking this lock.
|
||||
* @hw_block_list_lock: protects the HW block memory list.
|
||||
* @debugfs_list: node in debugfs list of contexts.
|
||||
* @hw_block_mem_list: list of HW block virtual mapped addresses.
|
||||
@ -1767,6 +1801,7 @@ struct hl_cs_outcome_store {
|
||||
* @cb_va_pool: device VA pool for command buffers which are mapped to the
|
||||
* device's MMU.
|
||||
* @sig_mgr: encaps signals handle manager.
|
||||
* @cb_va_pool_base: the base address for the device VA pool
|
||||
* @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
|
||||
* to user so user could inquire about CS. It is used as
|
||||
* index to cs_pending array.
|
||||
@ -1795,13 +1830,13 @@ struct hl_ctx {
|
||||
struct hl_cs_outcome_store outcome_store;
|
||||
struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX];
|
||||
struct mutex mem_hash_lock;
|
||||
struct mutex mmu_lock;
|
||||
struct mutex hw_block_list_lock;
|
||||
struct list_head debugfs_list;
|
||||
struct list_head hw_block_mem_list;
|
||||
struct hl_cs_counters_atomic cs_counters;
|
||||
struct gen_pool *cb_va_pool;
|
||||
struct hl_encaps_signals_mgr sig_mgr;
|
||||
u64 cb_va_pool_base;
|
||||
u64 cs_sequence;
|
||||
u64 *dram_default_hops;
|
||||
spinlock_t cs_lock;
|
||||
@ -1823,7 +1858,6 @@ struct hl_ctx_mgr {
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* COMMAND SUBMISSIONS
|
||||
*/
|
||||
@ -1889,7 +1923,7 @@ struct hl_userptr {
|
||||
* @tdr_active: true if TDR was activated for this CS (to prevent
|
||||
* double TDR activation).
|
||||
* @aborted: true if CS was aborted due to some device error.
|
||||
* @timestamp: true if a timestmap must be captured upon completion.
|
||||
* @timestamp: true if a timestamp must be captured upon completion.
|
||||
* @staged_last: true if this is the last staged CS and needs completion.
|
||||
* @staged_first: true if this is the first staged CS and we need to receive
|
||||
* timeout for this CS.
|
||||
@ -2047,14 +2081,16 @@ struct hl_vm_hash_node {
|
||||
* @node: node to hang on the list in context object.
|
||||
* @ctx: the context this node belongs to.
|
||||
* @vaddr: virtual address of the HW block.
|
||||
* @size: size of the block.
|
||||
* @block_size: size of the block.
|
||||
* @mapped_size: size of the block which is mapped. May change if partial un-mappings are done.
|
||||
* @id: HW block id (handle).
|
||||
*/
|
||||
struct hl_vm_hw_block_list_node {
|
||||
struct list_head node;
|
||||
struct hl_ctx *ctx;
|
||||
unsigned long vaddr;
|
||||
u32 size;
|
||||
u32 block_size;
|
||||
u32 mapped_size;
|
||||
u32 id;
|
||||
};
|
||||
|
||||
@ -2214,7 +2250,7 @@ struct hl_info_list {
|
||||
|
||||
/**
|
||||
* struct hl_debugfs_entry - debugfs dentry wrapper.
|
||||
* @info_ent: dentry realted ops.
|
||||
* @info_ent: dentry related ops.
|
||||
* @dev_entry: ASIC specific debugfs manager.
|
||||
*/
|
||||
struct hl_debugfs_entry {
|
||||
@ -2492,7 +2528,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
|
||||
break; \
|
||||
(val) = __elbi_read; \
|
||||
} else {\
|
||||
(val) = RREG32((u32)addr); \
|
||||
(val) = RREG32((u32)(addr)); \
|
||||
} \
|
||||
if (cond) \
|
||||
break; \
|
||||
@ -2503,7 +2539,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
|
||||
break; \
|
||||
(val) = __elbi_read; \
|
||||
} else {\
|
||||
(val) = RREG32((u32)addr); \
|
||||
(val) = RREG32((u32)(addr)); \
|
||||
} \
|
||||
break; \
|
||||
} \
|
||||
@ -2919,7 +2955,7 @@ struct razwi_info {
|
||||
* struct undefined_opcode_info - info about last undefined opcode error
|
||||
* @timestamp: timestamp of the undefined opcode error
|
||||
* @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ
|
||||
* entiers. In case all streams array entries are
|
||||
* entries. In case all streams array entries are
|
||||
* filled with values, it means the execution was in Lower-CP.
|
||||
* @cq_addr: the address of the current handled command buffer
|
||||
* @cq_size: the size of the current handled command buffer
|
||||
@ -2946,12 +2982,12 @@ struct undefined_opcode_info {
|
||||
};
|
||||
|
||||
/**
|
||||
* struct last_error_session_info - info about last session errors occurred.
|
||||
* @cs_timeout: CS timeout error last information.
|
||||
* @razwi: razwi last information.
|
||||
* struct hl_error_info - holds information collected during an error.
|
||||
* @cs_timeout: CS timeout error information.
|
||||
* @razwi: razwi information.
|
||||
* @undef_opcode: undefined opcode information
|
||||
*/
|
||||
struct last_error_session_info {
|
||||
struct hl_error_info {
|
||||
struct cs_timeout_info cs_timeout;
|
||||
struct razwi_info razwi;
|
||||
struct undefined_opcode_info undef_opcode;
|
||||
@ -2960,7 +2996,7 @@ struct last_error_session_info {
|
||||
/**
|
||||
* struct hl_reset_info - holds current device reset information.
|
||||
* @lock: lock to protect critical reset flows.
|
||||
* @compute_reset_cnt: number of compte resets since the driver was loaded.
|
||||
* @compute_reset_cnt: number of compute resets since the driver was loaded.
|
||||
* @hard_reset_cnt: number of hard resets since the driver was loaded.
|
||||
* @hard_reset_schedule_flags: hard reset is scheduled to after current compute reset,
|
||||
* here we hold the hard reset flags.
|
||||
@ -2971,7 +3007,7 @@ struct last_error_session_info {
|
||||
* @hard_reset_pending: is there a hard reset work pending.
|
||||
* @curr_reset_cause: saves an enumerated reset cause when a hard reset is
|
||||
* triggered, and cleared after it is shared with preboot.
|
||||
* @prev_reset_trigger: saves the previous trigger which caused a reset, overidden
|
||||
* @prev_reset_trigger: saves the previous trigger which caused a reset, overridden
|
||||
* with a new value on next reset
|
||||
* @reset_trigger_repeated: set if device reset is triggered more than once with
|
||||
* same cause.
|
||||
@ -3041,6 +3077,12 @@ struct hl_reset_info {
|
||||
* @asid_mutex: protects asid_bitmap.
|
||||
* @send_cpu_message_lock: enforces only one message in Host <-> CPU-CP queue.
|
||||
* @debug_lock: protects critical section of setting debug mode for device
|
||||
* @mmu_lock: protects the MMU page tables and invalidation h/w. Although the
|
||||
* page tables are per context, the invalidation h/w is per MMU.
|
||||
* Therefore, we can't allow multiple contexts (we only have two,
|
||||
* user and kernel) to access the invalidation h/w at the same time.
|
||||
* In addition, any change to the PGT, modifying the MMU hash or
|
||||
* walking the PGT requires talking this lock.
|
||||
* @asic_prop: ASIC specific immutable properties.
|
||||
* @asic_funcs: ASIC specific functions.
|
||||
* @asic_specific: ASIC specific information to use only from ASIC files.
|
||||
@ -3049,7 +3091,7 @@ struct hl_reset_info {
|
||||
* @hl_chip_info: ASIC's sensors information.
|
||||
* @device_status_description: device status description.
|
||||
* @hl_debugfs: device's debugfs manager.
|
||||
* @cb_pool: list of preallocated CBs.
|
||||
* @cb_pool: list of pre allocated CBs.
|
||||
* @cb_pool_lock: protects the CB pool.
|
||||
* @internal_cb_pool_virt_addr: internal command buffer pool virtual address.
|
||||
* @internal_cb_pool_dma_addr: internal command buffer pool dma address.
|
||||
@ -3070,7 +3112,7 @@ struct hl_reset_info {
|
||||
* @state_dump_specs: constants and dictionaries needed to dump system state.
|
||||
* @multi_cs_completion: array of multi-CS completion.
|
||||
* @clk_throttling: holds information about current/previous clock throttling events
|
||||
* @last_error: holds information about last session in which CS timeout or razwi error occurred.
|
||||
* @captured_err_info: holds information about errors.
|
||||
* @reset_info: holds current device reset information.
|
||||
* @stream_master_qid_arr: pointer to array with QIDs of master streams.
|
||||
* @fw_major_version: major version of current loaded preboot.
|
||||
@ -3111,7 +3153,8 @@ struct hl_reset_info {
|
||||
* @edma_binning: contains mask of edma engines that is received from the f/w which
|
||||
* indicates which edma engines are binned-out
|
||||
* @id: device minor.
|
||||
* @id_control: minor of the control device
|
||||
* @id_control: minor of the control device.
|
||||
* @cdev_idx: char device index. Used for setting its name.
|
||||
* @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
|
||||
* addresses.
|
||||
* @is_in_dram_scrub: true if dram scrub operation is on going.
|
||||
@ -3165,6 +3208,7 @@ struct hl_reset_info {
|
||||
* Used only for testing.
|
||||
* @heartbeat: Controls if we want to enable the heartbeat mechanism vs. the f/w, which verifies
|
||||
* that the f/w is always alive. Used only for testing.
|
||||
* @supports_ctx_switch: true if a ctx switch is required upon first submission.
|
||||
*/
|
||||
struct hl_device {
|
||||
struct pci_dev *pdev;
|
||||
@ -3204,6 +3248,7 @@ struct hl_device {
|
||||
struct mutex asid_mutex;
|
||||
struct mutex send_cpu_message_lock;
|
||||
struct mutex debug_lock;
|
||||
struct mutex mmu_lock;
|
||||
struct asic_fixed_properties asic_prop;
|
||||
const struct hl_asic_funcs *asic_funcs;
|
||||
void *asic_specific;
|
||||
@ -3242,7 +3287,7 @@ struct hl_device {
|
||||
struct multi_cs_completion multi_cs_completion[
|
||||
MULTI_CS_MAX_USER_CTX];
|
||||
struct hl_clk_throttle clk_throttling;
|
||||
struct last_error_session_info last_error;
|
||||
struct hl_error_info captured_err_info;
|
||||
|
||||
struct hl_reset_info reset_info;
|
||||
|
||||
@ -3271,6 +3316,7 @@ struct hl_device {
|
||||
u32 edma_binning;
|
||||
u16 id;
|
||||
u16 id_control;
|
||||
u16 cdev_idx;
|
||||
u16 cpu_pci_msb_addr;
|
||||
u8 is_in_dram_scrub;
|
||||
u8 disabled;
|
||||
@ -3300,6 +3346,7 @@ struct hl_device {
|
||||
u8 compute_ctx_in_release;
|
||||
u8 supports_mmu_prefetch;
|
||||
u8 reset_upon_device_release;
|
||||
u8 supports_ctx_switch;
|
||||
|
||||
/* Parameters for bring-up */
|
||||
u64 nic_ports_mask;
|
||||
@ -3426,15 +3473,18 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
|
||||
}
|
||||
|
||||
uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr);
|
||||
void *hl_asic_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
|
||||
gfp_t flag);
|
||||
void hl_asic_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr,
|
||||
dma_addr_t dma_handle);
|
||||
void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle);
|
||||
void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr);
|
||||
void *hl_asic_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags,
|
||||
dma_addr_t *dma_handle);
|
||||
void hl_asic_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr);
|
||||
void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
|
||||
gfp_t flag, const char *caller);
|
||||
void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr,
|
||||
dma_addr_t dma_handle, const char *caller);
|
||||
void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size,
|
||||
dma_addr_t *dma_handle, const char *caller);
|
||||
void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr,
|
||||
const char *caller);
|
||||
void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags,
|
||||
dma_addr_t *dma_handle, const char *caller);
|
||||
void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
|
||||
const char *caller);
|
||||
int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir);
|
||||
void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
|
||||
enum dma_data_direction dir);
|
||||
@ -3513,6 +3563,7 @@ void hl_sysfs_fini(struct hl_device *hdev);
|
||||
|
||||
int hl_hwmon_init(struct hl_device *hdev);
|
||||
void hl_hwmon_fini(struct hl_device *hdev);
|
||||
void hl_hwmon_release_resources(struct hl_device *hdev);
|
||||
|
||||
int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg,
|
||||
struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
|
||||
@ -3557,7 +3608,7 @@ void hl_hw_block_mem_init(struct hl_ctx *ctx);
|
||||
void hl_hw_block_mem_fini(struct hl_ctx *ctx);
|
||||
|
||||
u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
enum hl_va_range_type type, u32 size, u32 alignment);
|
||||
enum hl_va_range_type type, u64 size, u32 alignment);
|
||||
int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
u64 start_addr, u64 size);
|
||||
int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
|
||||
@ -3674,6 +3725,7 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev,
|
||||
struct cpucp_hbm_row_info *info);
|
||||
int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num);
|
||||
int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid);
|
||||
int hl_fw_send_device_activity(struct hl_device *hdev, bool open);
|
||||
int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
|
||||
bool is_wc[3]);
|
||||
int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data);
|
||||
@ -3697,6 +3749,8 @@ int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *va
|
||||
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value);
|
||||
long hl_fw_get_max_power(struct hl_device *hdev);
|
||||
void hl_fw_set_max_power(struct hl_device *hdev);
|
||||
int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info,
|
||||
u32 nonce);
|
||||
int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value);
|
||||
int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value);
|
||||
int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value);
|
||||
@ -3743,6 +3797,7 @@ struct hl_mmap_mem_buf *
|
||||
hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg,
|
||||
struct hl_mmap_mem_buf_behavior *behavior, gfp_t gfp,
|
||||
void *args);
|
||||
__printf(2, 3) void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...);
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
||||
|
@ -14,6 +14,9 @@
|
||||
#include <linux/aer.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/habanalabs.h>
|
||||
|
||||
#define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
|
||||
|
||||
#define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators"
|
||||
@ -27,7 +30,10 @@ static struct class *hl_class;
|
||||
static DEFINE_IDR(hl_devs_idr);
|
||||
static DEFINE_MUTEX(hl_devs_idr_lock);
|
||||
|
||||
static int timeout_locked = 30;
|
||||
#define HL_DEFAULT_TIMEOUT_LOCKED 30 /* 30 seconds */
|
||||
#define GAUDI_DEFAULT_TIMEOUT_LOCKED 600 /* 10 minutes */
|
||||
|
||||
static int timeout_locked = HL_DEFAULT_TIMEOUT_LOCKED;
|
||||
static int reset_on_lockup = 1;
|
||||
static int memory_scrub;
|
||||
static ulong boot_error_status_mask = ULONG_MAX;
|
||||
@ -55,14 +61,12 @@ MODULE_PARM_DESC(boot_error_status_mask,
|
||||
#define PCI_IDS_GAUDI_SEC 0x1010
|
||||
|
||||
#define PCI_IDS_GAUDI2 0x1020
|
||||
#define PCI_IDS_GAUDI2_SEC 0x1030
|
||||
|
||||
static const struct pci_device_id ids[] = {
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), },
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), },
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2_SEC), },
|
||||
{ 0, }
|
||||
};
|
||||
MODULE_DEVICE_TABLE(pci, ids);
|
||||
@ -92,9 +96,6 @@ static enum hl_asic_type get_asic_type(u16 device)
|
||||
case PCI_IDS_GAUDI2:
|
||||
asic_type = ASIC_GAUDI2;
|
||||
break;
|
||||
case PCI_IDS_GAUDI2_SEC:
|
||||
asic_type = ASIC_GAUDI2_SEC;
|
||||
break;
|
||||
default:
|
||||
asic_type = ASIC_INVALID;
|
||||
break;
|
||||
@ -107,7 +108,6 @@ static bool is_asic_secured(enum hl_asic_type asic_type)
|
||||
{
|
||||
switch (asic_type) {
|
||||
case ASIC_GAUDI_SEC:
|
||||
case ASIC_GAUDI2_SEC:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
@ -161,7 +161,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
||||
mutex_lock(&hdev->fpriv_list_lock);
|
||||
|
||||
if (!hl_device_operational(hdev, &status)) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
dev_dbg_ratelimited(hdev->dev,
|
||||
"Can't open %s because it is %s\n",
|
||||
dev_name(hdev->dev), hdev->status[status]);
|
||||
|
||||
@ -207,11 +207,13 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
||||
list_add(&hpriv->dev_node, &hdev->fpriv_list);
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
|
||||
hdev->asic_funcs->send_device_activity(hdev, true);
|
||||
|
||||
hl_debugfs_add_file(hpriv);
|
||||
|
||||
atomic_set(&hdev->last_error.cs_timeout.write_enable, 1);
|
||||
atomic_set(&hdev->last_error.razwi.write_enable, 1);
|
||||
hdev->last_error.undef_opcode.write_enable = true;
|
||||
atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1);
|
||||
atomic_set(&hdev->captured_err_info.razwi.write_enable, 1);
|
||||
hdev->captured_err_info.undef_opcode.write_enable = true;
|
||||
|
||||
hdev->open_counter++;
|
||||
hdev->last_successful_open_jif = jiffies;
|
||||
@ -269,7 +271,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
|
||||
mutex_lock(&hdev->fpriv_ctrl_list_lock);
|
||||
|
||||
if (!hl_device_operational(hdev, NULL)) {
|
||||
dev_err_ratelimited(hdev->dev_ctrl,
|
||||
dev_dbg_ratelimited(hdev->dev_ctrl,
|
||||
"Can't open %s because it is disabled or in reset\n",
|
||||
dev_name(hdev->dev_ctrl));
|
||||
rc = -EPERM;
|
||||
@ -314,12 +316,22 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev)
|
||||
hdev->boot_error_status_mask = boot_error_status_mask;
|
||||
}
|
||||
|
||||
static void fixup_device_params_per_asic(struct hl_device *hdev)
|
||||
static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout)
|
||||
{
|
||||
switch (hdev->asic_type) {
|
||||
case ASIC_GOYA:
|
||||
case ASIC_GAUDI:
|
||||
case ASIC_GAUDI_SEC:
|
||||
/* If user didn't request a different timeout than the default one, we have
|
||||
* a different default timeout for Gaudi
|
||||
*/
|
||||
if (timeout == HL_DEFAULT_TIMEOUT_LOCKED)
|
||||
hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED *
|
||||
MSEC_PER_SEC);
|
||||
|
||||
hdev->reset_upon_device_release = 0;
|
||||
break;
|
||||
|
||||
case ASIC_GOYA:
|
||||
hdev->reset_upon_device_release = 0;
|
||||
break;
|
||||
|
||||
@ -339,7 +351,7 @@ static int fixup_device_params(struct hl_device *hdev)
|
||||
hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
|
||||
|
||||
if (tmp_timeout)
|
||||
hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * 1000);
|
||||
hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC);
|
||||
else
|
||||
hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
|
||||
|
||||
@ -360,7 +372,7 @@ static int fixup_device_params(struct hl_device *hdev)
|
||||
if (!hdev->cpu_queues_enable)
|
||||
hdev->heartbeat = 0;
|
||||
|
||||
fixup_device_params_per_asic(hdev);
|
||||
fixup_device_params_per_asic(hdev, tmp_timeout);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
|
||||
[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
|
||||
@ -103,6 +104,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
|
||||
|
||||
hw_ip.edma_enabled_mask = prop->edma_enabled_mask;
|
||||
hw_ip.server_type = prop->server_type;
|
||||
hw_ip.security_enabled = prop->fw_security_enabled;
|
||||
|
||||
return copy_to_user(out, &hw_ip,
|
||||
min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0;
|
||||
@ -591,8 +593,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
if ((!max_size) || (!out))
|
||||
return -EINVAL;
|
||||
|
||||
info.seq = hdev->last_error.cs_timeout.seq;
|
||||
info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout.timestamp);
|
||||
info.seq = hdev->captured_err_info.cs_timeout.seq;
|
||||
info.timestamp = ktime_to_ns(hdev->captured_err_info.cs_timeout.timestamp);
|
||||
|
||||
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
|
||||
}
|
||||
@ -607,12 +609,12 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
if ((!max_size) || (!out))
|
||||
return -EINVAL;
|
||||
|
||||
info.timestamp = ktime_to_ns(hdev->last_error.razwi.timestamp);
|
||||
info.addr = hdev->last_error.razwi.addr;
|
||||
info.engine_id_1 = hdev->last_error.razwi.engine_id_1;
|
||||
info.engine_id_2 = hdev->last_error.razwi.engine_id_2;
|
||||
info.no_engine_id = hdev->last_error.razwi.non_engine_initiator;
|
||||
info.error_type = hdev->last_error.razwi.type;
|
||||
info.timestamp = ktime_to_ns(hdev->captured_err_info.razwi.timestamp);
|
||||
info.addr = hdev->captured_err_info.razwi.addr;
|
||||
info.engine_id_1 = hdev->captured_err_info.razwi.engine_id_1;
|
||||
info.engine_id_2 = hdev->captured_err_info.razwi.engine_id_2;
|
||||
info.no_engine_id = hdev->captured_err_info.razwi.non_engine_initiator;
|
||||
info.error_type = hdev->captured_err_info.razwi.type;
|
||||
|
||||
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
|
||||
}
|
||||
@ -627,13 +629,13 @@ static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *ar
|
||||
if ((!max_size) || (!out))
|
||||
return -EINVAL;
|
||||
|
||||
info.timestamp = ktime_to_ns(hdev->last_error.undef_opcode.timestamp);
|
||||
info.engine_id = hdev->last_error.undef_opcode.engine_id;
|
||||
info.cq_addr = hdev->last_error.undef_opcode.cq_addr;
|
||||
info.cq_size = hdev->last_error.undef_opcode.cq_size;
|
||||
info.stream_id = hdev->last_error.undef_opcode.stream_id;
|
||||
info.cb_addr_streams_len = hdev->last_error.undef_opcode.cb_addr_streams_len;
|
||||
memcpy(info.cb_addr_streams, hdev->last_error.undef_opcode.cb_addr_streams,
|
||||
info.timestamp = ktime_to_ns(hdev->captured_err_info.undef_opcode.timestamp);
|
||||
info.engine_id = hdev->captured_err_info.undef_opcode.engine_id;
|
||||
info.cq_addr = hdev->captured_err_info.undef_opcode.cq_addr;
|
||||
info.cq_size = hdev->captured_err_info.undef_opcode.cq_size;
|
||||
info.stream_id = hdev->captured_err_info.undef_opcode.stream_id;
|
||||
info.cb_addr_streams_len = hdev->captured_err_info.undef_opcode.cb_addr_streams_len;
|
||||
memcpy(info.cb_addr_streams, hdev->captured_err_info.undef_opcode.cb_addr_streams,
|
||||
sizeof(info.cb_addr_streams));
|
||||
|
||||
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
|
||||
@ -660,6 +662,55 @@ static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_
|
||||
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int sec_attest_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
{
|
||||
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
|
||||
struct cpucp_sec_attest_info *sec_attest_info;
|
||||
struct hl_info_sec_attest *info;
|
||||
u32 max_size = args->return_size;
|
||||
int rc;
|
||||
|
||||
if ((!max_size) || (!out))
|
||||
return -EINVAL;
|
||||
|
||||
sec_attest_info = kmalloc(sizeof(*sec_attest_info), GFP_KERNEL);
|
||||
if (!sec_attest_info)
|
||||
return -ENOMEM;
|
||||
|
||||
info = kmalloc(sizeof(*info), GFP_KERNEL);
|
||||
if (!info) {
|
||||
rc = -ENOMEM;
|
||||
goto free_sec_attest_info;
|
||||
}
|
||||
|
||||
rc = hl_fw_get_sec_attest_info(hpriv->hdev, sec_attest_info, args->sec_attest_nonce);
|
||||
if (rc)
|
||||
goto free_info;
|
||||
|
||||
info->nonce = le32_to_cpu(sec_attest_info->nonce);
|
||||
info->pcr_quote_len = le16_to_cpu(sec_attest_info->pcr_quote_len);
|
||||
info->pub_data_len = le16_to_cpu(sec_attest_info->pub_data_len);
|
||||
info->certificate_len = le16_to_cpu(sec_attest_info->certificate_len);
|
||||
info->pcr_num_reg = sec_attest_info->pcr_num_reg;
|
||||
info->pcr_reg_len = sec_attest_info->pcr_reg_len;
|
||||
info->quote_sig_len = sec_attest_info->quote_sig_len;
|
||||
memcpy(&info->pcr_data, &sec_attest_info->pcr_data, sizeof(info->pcr_data));
|
||||
memcpy(&info->pcr_quote, &sec_attest_info->pcr_quote, sizeof(info->pcr_quote));
|
||||
memcpy(&info->public_data, &sec_attest_info->public_data, sizeof(info->public_data));
|
||||
memcpy(&info->certificate, &sec_attest_info->certificate, sizeof(info->certificate));
|
||||
memcpy(&info->quote_sig, &sec_attest_info->quote_sig, sizeof(info->quote_sig));
|
||||
|
||||
rc = copy_to_user(out, info,
|
||||
min_t(size_t, max_size, sizeof(*info))) ? -EFAULT : 0;
|
||||
|
||||
free_info:
|
||||
kfree(info);
|
||||
free_sec_attest_info:
|
||||
kfree(sec_attest_info);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
{
|
||||
int rc;
|
||||
@ -697,6 +748,42 @@ static int eventfd_unregister(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int engine_status_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
{
|
||||
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
|
||||
u32 status_buf_size = args->return_size;
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct engines_data eng_data;
|
||||
int rc;
|
||||
|
||||
if ((status_buf_size < SZ_1K) || (status_buf_size > HL_ENGINES_DATA_MAX_SIZE) || (!out))
|
||||
return -EINVAL;
|
||||
|
||||
eng_data.actual_size = 0;
|
||||
eng_data.allocated_buf_size = status_buf_size;
|
||||
eng_data.buf = vmalloc(status_buf_size);
|
||||
if (!eng_data.buf)
|
||||
return -ENOMEM;
|
||||
|
||||
hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data);
|
||||
|
||||
if (eng_data.actual_size > eng_data.allocated_buf_size) {
|
||||
dev_err(hdev->dev,
|
||||
"Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n",
|
||||
eng_data.actual_size, status_buf_size);
|
||||
vfree(eng_data.buf);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
args->user_buffer_actual_size = eng_data.actual_size;
|
||||
rc = copy_to_user(out, eng_data.buf, min_t(size_t, status_buf_size, eng_data.actual_size)) ?
|
||||
-EFAULT : 0;
|
||||
|
||||
vfree(eng_data.buf);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
|
||||
struct device *dev)
|
||||
{
|
||||
@ -806,12 +893,18 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
|
||||
case HL_INFO_DRAM_PENDING_ROWS:
|
||||
return dram_pending_rows_info(hpriv, args);
|
||||
|
||||
case HL_INFO_SECURED_ATTESTATION:
|
||||
return sec_attest_info(hpriv, args);
|
||||
|
||||
case HL_INFO_REGISTER_EVENTFD:
|
||||
return eventfd_register(hpriv, args);
|
||||
|
||||
case HL_INFO_UNREGISTER_EVENTFD:
|
||||
return eventfd_unregister(hpriv, args);
|
||||
|
||||
case HL_INFO_ENGINE_STATUS:
|
||||
return engine_status_info(hpriv, args);
|
||||
|
||||
default:
|
||||
dev_err(dev, "Invalid request %d\n", args->op);
|
||||
rc = -EINVAL;
|
||||
|
@ -826,9 +826,7 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
|
||||
|
||||
q->kernel_address = p;
|
||||
|
||||
q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH,
|
||||
sizeof(*q->shadow_queue),
|
||||
GFP_KERNEL);
|
||||
q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(struct hl_cs_job *), GFP_KERNEL);
|
||||
if (!q->shadow_queue) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate shadow queue for H/W queue %d\n",
|
||||
|
@ -194,7 +194,8 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sen
|
||||
curr_arr[sensors_by_type_next_index[type]++] = flags;
|
||||
}
|
||||
|
||||
channels_info = kcalloc(num_active_sensor_types + 1, sizeof(*channels_info), GFP_KERNEL);
|
||||
channels_info = kcalloc(num_active_sensor_types + 1, sizeof(struct hwmon_channel_info *),
|
||||
GFP_KERNEL);
|
||||
if (!channels_info) {
|
||||
rc = -ENOMEM;
|
||||
goto channels_info_array_err;
|
||||
@ -910,3 +911,24 @@ void hl_hwmon_fini(struct hl_device *hdev)
|
||||
|
||||
hwmon_device_unregister(hdev->hwmon_dev);
|
||||
}
|
||||
|
||||
void hl_hwmon_release_resources(struct hl_device *hdev)
|
||||
{
|
||||
const struct hwmon_channel_info **channel_info_arr;
|
||||
int i = 0;
|
||||
|
||||
if (!hdev->hl_chip_info->info)
|
||||
return;
|
||||
|
||||
channel_info_arr = hdev->hl_chip_info->info;
|
||||
|
||||
while (channel_info_arr[i]) {
|
||||
kfree(channel_info_arr[i]->config);
|
||||
kfree(channel_info_arr[i]);
|
||||
i++;
|
||||
}
|
||||
|
||||
kfree(channel_info_arr);
|
||||
|
||||
hdev->hl_chip_info->info = NULL;
|
||||
}
|
||||
|
@ -457,7 +457,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev,
|
||||
prev = list_prev_entry(va_block, node);
|
||||
if (&prev->node != va_list && prev->end + 1 == va_block->start) {
|
||||
prev->end = va_block->end;
|
||||
prev->size = prev->end - prev->start;
|
||||
prev->size = prev->end - prev->start + 1;
|
||||
list_del(&va_block->node);
|
||||
kfree(va_block);
|
||||
va_block = prev;
|
||||
@ -466,7 +466,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev,
|
||||
next = list_next_entry(va_block, node);
|
||||
if (&next->node != va_list && va_block->end + 1 == next->start) {
|
||||
next->start = va_block->start;
|
||||
next->size = next->end - next->start;
|
||||
next->size = next->end - next->start + 1;
|
||||
list_del(&va_block->node);
|
||||
kfree(va_block);
|
||||
}
|
||||
@ -755,7 +755,7 @@ out:
|
||||
* - Return the start address of the virtual block.
|
||||
*/
|
||||
u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
enum hl_va_range_type type, u32 size, u32 alignment)
|
||||
enum hl_va_range_type type, u64 size, u32 alignment)
|
||||
{
|
||||
return get_va_block(hdev, ctx->va_range[type], size, 0,
|
||||
max(alignment, ctx->va_range[type]->page_size),
|
||||
@ -1210,18 +1210,18 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device
|
||||
goto va_block_err;
|
||||
}
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
mutex_lock(&hdev->mmu_lock);
|
||||
|
||||
rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle);
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
mutex_unlock(&hdev->mmu_lock);
|
||||
goto map_err;
|
||||
}
|
||||
|
||||
rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV,
|
||||
ctx->asid, ret_vaddr, phys_pg_pack->total_size);
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
mutex_unlock(&hdev->mmu_lock);
|
||||
if (rc)
|
||||
goto map_err;
|
||||
|
||||
@ -1362,7 +1362,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
||||
else
|
||||
vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
mutex_lock(&hdev->mmu_lock);
|
||||
|
||||
unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
|
||||
|
||||
@ -1375,7 +1375,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
||||
rc = hl_mmu_invalidate_cache_range(hdev, true, *vm_type, ctx->asid, vaddr,
|
||||
phys_pg_pack->total_size);
|
||||
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
mutex_unlock(&hdev->mmu_lock);
|
||||
|
||||
/*
|
||||
* If the context is closing we don't need to check for the MMU cache
|
||||
@ -1418,18 +1418,23 @@ vm_type_err:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int map_block(struct hl_device *hdev, u64 address, u64 *handle,
|
||||
u32 *size)
|
||||
static int map_block(struct hl_device *hdev, u64 address, u64 *handle, u32 *size)
|
||||
{
|
||||
u32 block_id = 0;
|
||||
u32 block_id;
|
||||
int rc;
|
||||
|
||||
*handle = 0;
|
||||
if (size)
|
||||
*size = 0;
|
||||
|
||||
rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
*handle = block_id | HL_MMAP_TYPE_BLOCK;
|
||||
*handle <<= PAGE_SHIFT;
|
||||
|
||||
return rc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hw_block_vm_close(struct vm_area_struct *vma)
|
||||
@ -1437,6 +1442,13 @@ static void hw_block_vm_close(struct vm_area_struct *vma)
|
||||
struct hl_vm_hw_block_list_node *lnode =
|
||||
(struct hl_vm_hw_block_list_node *) vma->vm_private_data;
|
||||
struct hl_ctx *ctx = lnode->ctx;
|
||||
long new_mmap_size;
|
||||
|
||||
new_mmap_size = lnode->mapped_size - (vma->vm_end - vma->vm_start);
|
||||
if (new_mmap_size > 0) {
|
||||
lnode->mapped_size = new_mmap_size;
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&ctx->hw_block_list_lock);
|
||||
list_del(&lnode->node);
|
||||
@ -1487,23 +1499,23 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
|
||||
if (!lnode)
|
||||
return -ENOMEM;
|
||||
|
||||
vma->vm_ops = &hw_block_vm_ops;
|
||||
vma->vm_private_data = lnode;
|
||||
|
||||
hl_ctx_get(ctx);
|
||||
|
||||
rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
|
||||
if (rc) {
|
||||
hl_ctx_put(ctx);
|
||||
kfree(lnode);
|
||||
return rc;
|
||||
}
|
||||
|
||||
hl_ctx_get(ctx);
|
||||
|
||||
lnode->ctx = ctx;
|
||||
lnode->vaddr = vma->vm_start;
|
||||
lnode->size = block_size;
|
||||
lnode->block_size = block_size;
|
||||
lnode->mapped_size = lnode->block_size;
|
||||
lnode->id = block_id;
|
||||
|
||||
vma->vm_private_data = lnode;
|
||||
vma->vm_ops = &hw_block_vm_ops;
|
||||
|
||||
mutex_lock(&ctx->hw_block_list_lock);
|
||||
list_add_tail(&lnode->node, &ctx->hw_block_mem_list);
|
||||
mutex_unlock(&ctx->hw_block_list_lock);
|
||||
@ -2296,8 +2308,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
userptr->pages = kvmalloc_array(npages, sizeof(*userptr->pages),
|
||||
GFP_KERNEL);
|
||||
userptr->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
|
||||
if (!userptr->pages)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -2759,13 +2770,13 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
|
||||
unmap_device_va(ctx, &args, true);
|
||||
}
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
mutex_lock(&hdev->mmu_lock);
|
||||
|
||||
/* invalidate the cache once after the unmapping loop */
|
||||
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
|
||||
hl_mmu_invalidate_cache(hdev, true, MMU_OP_PHYS_PACK);
|
||||
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
mutex_unlock(&hdev->mmu_lock);
|
||||
|
||||
INIT_LIST_HEAD(&free_list);
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
* hl_mmap_mem_buf_get - increase the buffer refcount and return a pointer to
|
||||
* the buffer descriptor.
|
||||
*
|
||||
* @mmg: parent unifed memory manager
|
||||
* @mmg: parent unified memory manager
|
||||
* @handle: requested buffer handle
|
||||
*
|
||||
* Find the buffer in the store and return a pointer to its descriptor.
|
||||
@ -104,7 +104,7 @@ int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf)
|
||||
* hl_mmap_mem_buf_put_handle - decrease the reference to the buffer with the
|
||||
* given handle.
|
||||
*
|
||||
* @mmg: parent unifed memory manager
|
||||
* @mmg: parent unified memory manager
|
||||
* @handle: requested buffer handle
|
||||
*
|
||||
* Decrease the reference to the buffer, and release it if it was the last one.
|
||||
@ -137,7 +137,7 @@ int hl_mmap_mem_buf_put_handle(struct hl_mem_mgr *mmg, u64 handle)
|
||||
/**
|
||||
* hl_mmap_mem_buf_alloc - allocate a new mappable buffer
|
||||
*
|
||||
* @mmg: parent unifed memory manager
|
||||
* @mmg: parent unified memory manager
|
||||
* @behavior: behavior object describing this buffer polymorphic behavior
|
||||
* @gfp: gfp flags to use for the memory allocations
|
||||
* @args: additional args passed to behavior->alloc
|
||||
@ -222,7 +222,7 @@ static const struct vm_operations_struct hl_mmap_mem_buf_vm_ops = {
|
||||
/**
|
||||
* hl_mem_mgr_mmap - map the given buffer to the user
|
||||
*
|
||||
* @mmg: unifed memory manager
|
||||
* @mmg: unified memory manager
|
||||
* @vma: the vma object for which mmap was closed.
|
||||
* @args: additional args passed to behavior->mmap
|
||||
*
|
||||
@ -322,7 +322,7 @@ void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg)
|
||||
/**
|
||||
* hl_mem_mgr_fini - release unified memory manager
|
||||
*
|
||||
* @mmg: parent unifed memory manager
|
||||
* @mmg: parent unified memory manager
|
||||
*
|
||||
* Release the unified memory manager. Shall be called from an interrupt context.
|
||||
*/
|
||||
|
@ -9,6 +9,8 @@
|
||||
|
||||
#include "../habanalabs.h"
|
||||
|
||||
#include <trace/events/habanalabs.h>
|
||||
|
||||
/**
|
||||
* hl_mmu_get_funcs() - get MMU functions structure
|
||||
* @hdev: habanalabs device structure.
|
||||
@ -45,6 +47,8 @@ int hl_mmu_init(struct hl_device *hdev)
|
||||
if (!hdev->mmu_enable)
|
||||
return 0;
|
||||
|
||||
mutex_init(&hdev->mmu_lock);
|
||||
|
||||
if (hdev->mmu_func[MMU_DR_PGT].init != NULL) {
|
||||
rc = hdev->mmu_func[MMU_DR_PGT].init(hdev);
|
||||
if (rc)
|
||||
@ -86,6 +90,8 @@ void hl_mmu_fini(struct hl_device *hdev)
|
||||
|
||||
if (hdev->mmu_func[MMU_HR_PGT].fini != NULL)
|
||||
hdev->mmu_func[MMU_HR_PGT].fini(hdev);
|
||||
|
||||
mutex_destroy(&hdev->mmu_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -104,8 +110,6 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx)
|
||||
if (!hdev->mmu_enable)
|
||||
return 0;
|
||||
|
||||
mutex_init(&ctx->mmu_lock);
|
||||
|
||||
if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) {
|
||||
rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx);
|
||||
if (rc)
|
||||
@ -149,8 +153,6 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
|
||||
|
||||
if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL)
|
||||
hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx);
|
||||
|
||||
mutex_destroy(&ctx->mmu_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -259,6 +261,9 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flu
|
||||
if (flush_pte)
|
||||
mmu_funcs->flush(ctx);
|
||||
|
||||
if (trace_habanalabs_mmu_unmap_enabled() && !rc)
|
||||
trace_habanalabs_mmu_unmap(hdev->dev, virt_addr, 0, page_size, flush_pte);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -344,6 +349,8 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_s
|
||||
if (flush_pte)
|
||||
mmu_funcs->flush(ctx);
|
||||
|
||||
trace_habanalabs_mmu_map(hdev->dev, virt_addr, phys_addr, page_size, flush_pte);
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
@ -403,6 +410,8 @@ int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
|
||||
dev_err(hdev->dev,
|
||||
"Map failed for va 0x%llx to pa 0x%llx\n",
|
||||
curr_va, curr_pa);
|
||||
/* last mapping failed so don't try to unmap it - reduce off by page_size */
|
||||
off -= page_size;
|
||||
goto unmap;
|
||||
}
|
||||
}
|
||||
@ -600,9 +609,9 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
|
||||
pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
|
||||
mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr);
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
mutex_lock(&hdev->mmu_lock);
|
||||
rc = mmu_funcs->get_tlb_info(ctx, virt_addr, hops);
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
mutex_unlock(&hdev->mmu_lock);
|
||||
|
||||
if (rc)
|
||||
return rc;
|
||||
@ -692,16 +701,16 @@ static void hl_mmu_prefetch_work_function(struct work_struct *work)
|
||||
{
|
||||
struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, pf_work);
|
||||
struct hl_ctx *ctx = pfw->ctx;
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
|
||||
if (!hl_device_operational(ctx->hdev, NULL))
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
goto put_ctx;
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
mutex_lock(&hdev->mmu_lock);
|
||||
|
||||
ctx->hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid,
|
||||
pfw->va, pfw->size);
|
||||
hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, pfw->va, pfw->size);
|
||||
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
mutex_unlock(&hdev->mmu_lock);
|
||||
|
||||
put_ctx:
|
||||
/*
|
||||
|
@ -375,6 +375,14 @@ out:
|
||||
return max_size;
|
||||
}
|
||||
|
||||
static ssize_t security_enabled_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
|
||||
return sprintf(buf, "%d\n", hdev->asic_prop.fw_security_enabled);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RO(armcp_kernel_ver);
|
||||
static DEVICE_ATTR_RO(armcp_ver);
|
||||
static DEVICE_ATTR_RO(cpld_ver);
|
||||
@ -393,6 +401,7 @@ static DEVICE_ATTR_RO(status);
|
||||
static DEVICE_ATTR_RO(thermal_ver);
|
||||
static DEVICE_ATTR_RO(uboot_ver);
|
||||
static DEVICE_ATTR_RO(fw_os_ver);
|
||||
static DEVICE_ATTR_RO(security_enabled);
|
||||
|
||||
static struct bin_attribute bin_attr_eeprom = {
|
||||
.attr = {.name = "eeprom", .mode = (0444)},
|
||||
@ -417,6 +426,7 @@ static struct attribute *hl_dev_attrs[] = {
|
||||
&dev_attr_thermal_ver.attr,
|
||||
&dev_attr_uboot_ver.attr,
|
||||
&dev_attr_fw_os_ver.attr,
|
||||
&dev_attr_security_enabled.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@ -899,12 +899,13 @@ static int gaudi_early_fini(struct hl_device *hdev)
|
||||
*/
|
||||
static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
|
||||
int rc;
|
||||
|
||||
if (hdev->asic_prop.fw_security_enabled) {
|
||||
if ((hdev->fw_components & FW_TYPE_LINUX) &&
|
||||
(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
|
||||
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
|
||||
@ -939,9 +940,7 @@ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
|
||||
else
|
||||
freq = pll_clk / (div_fctr + 1);
|
||||
} else {
|
||||
dev_warn(hdev->dev,
|
||||
"Received invalid div select value: %d",
|
||||
div_sel);
|
||||
dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
|
||||
freq = 0;
|
||||
}
|
||||
}
|
||||
@ -985,9 +984,10 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev,
|
||||
init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
|
||||
|
||||
init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
|
||||
dst_addr = (prop->sram_user_base_address &
|
||||
GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
|
||||
GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
|
||||
|
||||
/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
|
||||
dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
|
||||
round_up(prop->sram_user_base_address, SZ_8K));
|
||||
init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
|
||||
|
||||
job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
|
||||
@ -1683,23 +1683,7 @@ disable_pci_access:
|
||||
|
||||
static void gaudi_late_fini(struct hl_device *hdev)
|
||||
{
|
||||
const struct hwmon_channel_info **channel_info_arr;
|
||||
int i = 0;
|
||||
|
||||
if (!hdev->hl_chip_info->info)
|
||||
return;
|
||||
|
||||
channel_info_arr = hdev->hl_chip_info->info;
|
||||
|
||||
while (channel_info_arr[i]) {
|
||||
kfree(channel_info_arr[i]->config);
|
||||
kfree(channel_info_arr[i]);
|
||||
i++;
|
||||
}
|
||||
|
||||
kfree(channel_info_arr);
|
||||
|
||||
hdev->hl_chip_info->info = NULL;
|
||||
hl_hwmon_release_resources(hdev);
|
||||
}
|
||||
|
||||
static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
|
||||
@ -4723,7 +4707,7 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev)
|
||||
addr = prop->sram_user_base_address;
|
||||
size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
|
||||
|
||||
dev_dbg(hdev->dev, "Scrubing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
|
||||
dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
|
||||
addr, addr + size, val);
|
||||
rc = gaudi_memset_device_memory(hdev, addr, size, val);
|
||||
if (rc) {
|
||||
@ -6911,9 +6895,9 @@ static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 strea
|
||||
stream, cq_ptr, size);
|
||||
|
||||
if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
|
||||
hdev->last_error.undef_opcode.cq_addr = cq_ptr;
|
||||
hdev->last_error.undef_opcode.cq_size = size;
|
||||
hdev->last_error.undef_opcode.stream_id = stream;
|
||||
hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
|
||||
hdev->captured_err_info.undef_opcode.cq_size = size;
|
||||
hdev->captured_err_info.undef_opcode.stream_id = stream;
|
||||
}
|
||||
}
|
||||
|
||||
@ -6979,7 +6963,7 @@ static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
|
||||
}
|
||||
|
||||
if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
|
||||
struct undefined_opcode_info *undef_opcode = &hdev->last_error.undef_opcode;
|
||||
struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
|
||||
u32 arr_idx = undef_opcode->cb_addr_streams_len;
|
||||
|
||||
if (arr_idx == 0) {
|
||||
@ -7063,11 +7047,11 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
|
||||
}
|
||||
/* check for undefined opcode */
|
||||
if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
|
||||
hdev->last_error.undef_opcode.write_enable) {
|
||||
memset(&hdev->last_error.undef_opcode, 0,
|
||||
sizeof(hdev->last_error.undef_opcode));
|
||||
hdev->captured_err_info.undef_opcode.write_enable) {
|
||||
memset(&hdev->captured_err_info.undef_opcode, 0,
|
||||
sizeof(hdev->captured_err_info.undef_opcode));
|
||||
|
||||
hdev->last_error.undef_opcode.write_enable = false;
|
||||
hdev->captured_err_info.undef_opcode.write_enable = false;
|
||||
*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
|
||||
}
|
||||
|
||||
@ -7233,12 +7217,6 @@ static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
|
||||
|
||||
switch (event_type) {
|
||||
case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
|
||||
/* In TPC QM event, notify on TPC assertion. While there isn't
|
||||
* a specific event for assertion yet, the FW generates QM event.
|
||||
* The SW upper layer will inspect an internal mapped area to indicate
|
||||
* if the event is a tpc assertion or tpc QM.
|
||||
*/
|
||||
*event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
|
||||
index = event_type - GAUDI_EVENT_TPC0_QM;
|
||||
qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
|
||||
qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
|
||||
@ -7349,18 +7327,19 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
|
||||
gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
|
||||
|
||||
/* In case it's the first razwi, save its parameters*/
|
||||
rc = atomic_cmpxchg(&hdev->last_error.razwi.write_enable, 1, 0);
|
||||
rc = atomic_cmpxchg(&hdev->captured_err_info.razwi.write_enable, 1, 0);
|
||||
if (rc) {
|
||||
hdev->last_error.razwi.timestamp = ktime_get();
|
||||
hdev->last_error.razwi.addr = razwi_addr;
|
||||
hdev->last_error.razwi.engine_id_1 = engine_id_1;
|
||||
hdev->last_error.razwi.engine_id_2 = engine_id_2;
|
||||
hdev->captured_err_info.razwi.timestamp = ktime_get();
|
||||
hdev->captured_err_info.razwi.addr = razwi_addr;
|
||||
hdev->captured_err_info.razwi.engine_id_1 = engine_id_1;
|
||||
hdev->captured_err_info.razwi.engine_id_2 = engine_id_2;
|
||||
/*
|
||||
* If first engine id holds non valid value the razwi initiator
|
||||
* does not have engine id
|
||||
*/
|
||||
hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX);
|
||||
hdev->last_error.razwi.type = razwi_type;
|
||||
hdev->captured_err_info.razwi.non_engine_initiator =
|
||||
(engine_id_1 == U16_MAX);
|
||||
hdev->captured_err_info.razwi.type = razwi_type;
|
||||
|
||||
}
|
||||
}
|
||||
@ -7427,7 +7406,7 @@ static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
|
||||
event_type, desc);
|
||||
}
|
||||
|
||||
static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
|
||||
static int gaudi_compute_reset_late_init(struct hl_device *hdev)
|
||||
{
|
||||
/* GAUDI doesn't support any reset except hard-reset */
|
||||
return -EPERM;
|
||||
@ -7702,6 +7681,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
|
||||
case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
|
||||
gaudi_print_irq_info(hdev, event_type, true);
|
||||
gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
|
||||
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
|
||||
fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
|
||||
goto reset_device;
|
||||
|
||||
@ -7711,6 +7691,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
|
||||
case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
|
||||
gaudi_print_irq_info(hdev, event_type, false);
|
||||
fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
|
||||
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
|
||||
goto reset_device;
|
||||
|
||||
case GAUDI_EVENT_HBM0_SPI_0:
|
||||
@ -7722,6 +7703,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
|
||||
gaudi_hbm_event_to_dev(event_type),
|
||||
&eq_entry->hbm_ecc_data);
|
||||
fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
|
||||
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
|
||||
goto reset_device;
|
||||
|
||||
case GAUDI_EVENT_HBM0_SPI_1:
|
||||
@ -7733,6 +7715,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
|
||||
gaudi_hbm_event_to_dev(event_type),
|
||||
&eq_entry->hbm_ecc_data);
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_TPC0_DEC:
|
||||
@ -7743,10 +7726,17 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
|
||||
case GAUDI_EVENT_TPC5_DEC:
|
||||
case GAUDI_EVENT_TPC6_DEC:
|
||||
case GAUDI_EVENT_TPC7_DEC:
|
||||
/* In TPC DEC event, notify on TPC assertion. While there isn't
|
||||
* a specific event for assertion yet, the FW generates TPC DEC event.
|
||||
* The SW upper layer will inspect an internal mapped area to indicate
|
||||
* if the event is a TPC Assertion or a "real" TPC DEC.
|
||||
*/
|
||||
event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
|
||||
gaudi_print_irq_info(hdev, event_type, true);
|
||||
reset_required = gaudi_tpc_read_interrupts(hdev,
|
||||
tpc_dec_event_to_tpc_id(event_type),
|
||||
"AXI_SLV_DEC_Error");
|
||||
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
|
||||
if (reset_required) {
|
||||
dev_err(hdev->dev, "reset required due to %s\n",
|
||||
gaudi_irq_map_table[event_type].name);
|
||||
@ -7755,6 +7745,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
|
||||
goto reset_device;
|
||||
} else {
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -7770,6 +7761,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
|
||||
reset_required = gaudi_tpc_read_interrupts(hdev,
|
||||
tpc_krn_event_to_tpc_id(event_type),
|
||||
"KRN_ERR");
|
||||
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
|
||||
if (reset_required) {
|
||||
dev_err(hdev->dev, "reset required due to %s\n",
|
||||
gaudi_irq_map_table[event_type].name);
|
||||
@ -7778,6 +7770,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
|
||||
goto reset_device;
|
||||
} else {
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -7806,9 +7799,25 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
|
||||
gaudi_print_irq_info(hdev, event_type, true);
|
||||
gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_PCIE_DEC:
|
||||
case GAUDI_EVENT_CPU_AXI_SPLITTER:
|
||||
case GAUDI_EVENT_PSOC_AXI_DEC:
|
||||
case GAUDI_EVENT_PSOC_PRSTN_FALL:
|
||||
gaudi_print_irq_info(hdev, event_type, true);
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_MMU_PAGE_FAULT:
|
||||
case GAUDI_EVENT_MMU_WR_PERM:
|
||||
gaudi_print_irq_info(hdev, event_type, true);
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_MME0_WBC_RSP:
|
||||
case GAUDI_EVENT_MME0_SBAB0_RSP:
|
||||
case GAUDI_EVENT_MME1_WBC_RSP:
|
||||
@ -7817,11 +7826,6 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
|
||||
case GAUDI_EVENT_MME2_SBAB0_RSP:
|
||||
case GAUDI_EVENT_MME3_WBC_RSP:
|
||||
case GAUDI_EVENT_MME3_SBAB0_RSP:
|
||||
case GAUDI_EVENT_CPU_AXI_SPLITTER:
|
||||
case GAUDI_EVENT_PSOC_AXI_DEC:
|
||||
case GAUDI_EVENT_PSOC_PRSTN_FALL:
|
||||
case GAUDI_EVENT_MMU_PAGE_FAULT:
|
||||
case GAUDI_EVENT_MMU_WR_PERM:
|
||||
case GAUDI_EVENT_RAZWI_OR_ADC:
|
||||
case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
|
||||
case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
|
||||
@ -7841,10 +7845,12 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
|
||||
gaudi_print_irq_info(hdev, event_type, true);
|
||||
gaudi_handle_qman_err(hdev, event_type, &event_mask);
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_RAZWI_OR_ADC_SW:
|
||||
gaudi_print_irq_info(hdev, event_type, true);
|
||||
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
|
||||
goto reset_device;
|
||||
|
||||
case GAUDI_EVENT_TPC0_BMON_SPMU:
|
||||
@ -7858,11 +7864,13 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
|
||||
case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
|
||||
gaudi_print_irq_info(hdev, event_type, false);
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
|
||||
gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
|
||||
@ -7870,6 +7878,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
|
||||
gaudi_print_sm_sei_info(hdev, event_type,
|
||||
&eq_entry->sm_sei_data);
|
||||
rc = hl_state_dump(hdev);
|
||||
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
|
||||
if (rc)
|
||||
dev_err(hdev->dev,
|
||||
"Error during system state dump %d\n", rc);
|
||||
@ -7880,6 +7889,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
|
||||
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
|
||||
gaudi_print_clk_change_info(hdev, event_type);
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
break;
|
||||
@ -7889,20 +7899,24 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
|
||||
dev_err(hdev->dev,
|
||||
"Received high temp H/W interrupt %d (cause %d)\n",
|
||||
event_type, cause);
|
||||
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_DEV_RESET_REQ:
|
||||
gaudi_print_irq_info(hdev, event_type, false);
|
||||
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
|
||||
goto reset_device;
|
||||
|
||||
case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
|
||||
gaudi_print_irq_info(hdev, event_type, false);
|
||||
gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
|
||||
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
|
||||
goto reset_device;
|
||||
|
||||
case GAUDI_EVENT_FW_ALIVE_S:
|
||||
gaudi_print_irq_info(hdev, event_type, false);
|
||||
gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
|
||||
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
|
||||
goto reset_device;
|
||||
|
||||
default:
|
||||
@ -8066,8 +8080,8 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
|
||||
u8 mask_len, struct seq_file *s)
|
||||
static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
|
||||
struct engines_data *e)
|
||||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
|
||||
@ -8079,8 +8093,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
|
||||
u64 offset;
|
||||
int i, dma_id, port;
|
||||
|
||||
if (s)
|
||||
seq_puts(s,
|
||||
if (e)
|
||||
hl_engine_data_sprintf(e,
|
||||
"\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
|
||||
"--- ------- ------------ ---------- -------------\n");
|
||||
|
||||
@ -8097,14 +8111,14 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
|
||||
|
||||
if (mask && !is_eng_idle)
|
||||
set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
|
||||
if (s)
|
||||
seq_printf(s, fmt, dma_id,
|
||||
if (e)
|
||||
hl_engine_data_sprintf(e, fmt, dma_id,
|
||||
is_eng_idle ? "Y" : "N", qm_glbl_sts0,
|
||||
qm_cgm_sts, dma_core_sts0);
|
||||
}
|
||||
|
||||
if (s)
|
||||
seq_puts(s,
|
||||
if (e)
|
||||
hl_engine_data_sprintf(e,
|
||||
"\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
|
||||
"--- ------- ------------ ---------- ----------\n");
|
||||
|
||||
@ -8119,14 +8133,14 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
|
||||
|
||||
if (mask && !is_eng_idle)
|
||||
set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
|
||||
if (s)
|
||||
seq_printf(s, fmt, i,
|
||||
if (e)
|
||||
hl_engine_data_sprintf(e, fmt, i,
|
||||
is_eng_idle ? "Y" : "N",
|
||||
qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
|
||||
}
|
||||
|
||||
if (s)
|
||||
seq_puts(s,
|
||||
if (e)
|
||||
hl_engine_data_sprintf(e,
|
||||
"\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
|
||||
"--- ------- ------------ ---------- -----------\n");
|
||||
|
||||
@ -8147,20 +8161,21 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
|
||||
|
||||
if (mask && !is_eng_idle)
|
||||
set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
|
||||
if (s) {
|
||||
if (e) {
|
||||
if (!is_slave)
|
||||
seq_printf(s, fmt, i,
|
||||
hl_engine_data_sprintf(e, fmt, i,
|
||||
is_eng_idle ? "Y" : "N",
|
||||
qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
|
||||
else
|
||||
seq_printf(s, mme_slave_fmt, i,
|
||||
hl_engine_data_sprintf(e, mme_slave_fmt, i,
|
||||
is_eng_idle ? "Y" : "N", "-",
|
||||
"-", mme_arch_sts);
|
||||
}
|
||||
}
|
||||
|
||||
if (s)
|
||||
seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
|
||||
if (e)
|
||||
hl_engine_data_sprintf(e,
|
||||
"\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
|
||||
"--- ------- ------------ ----------\n");
|
||||
|
||||
for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
|
||||
@ -8174,8 +8189,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
|
||||
|
||||
if (mask && !is_eng_idle)
|
||||
set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
|
||||
if (s)
|
||||
seq_printf(s, nic_fmt, port,
|
||||
if (e)
|
||||
hl_engine_data_sprintf(e, nic_fmt, port,
|
||||
is_eng_idle ? "Y" : "N",
|
||||
qm_glbl_sts0, qm_cgm_sts);
|
||||
}
|
||||
@ -8189,15 +8204,15 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
|
||||
|
||||
if (mask && !is_eng_idle)
|
||||
set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
|
||||
if (s)
|
||||
seq_printf(s, nic_fmt, port,
|
||||
if (e)
|
||||
hl_engine_data_sprintf(e, nic_fmt, port,
|
||||
is_eng_idle ? "Y" : "N",
|
||||
qm_glbl_sts0, qm_cgm_sts);
|
||||
}
|
||||
}
|
||||
|
||||
if (s)
|
||||
seq_puts(s, "\n");
|
||||
if (e)
|
||||
hl_engine_data_sprintf(e, "\n");
|
||||
|
||||
return is_idle;
|
||||
}
|
||||
@ -8392,13 +8407,13 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
|
||||
goto destroy_internal_cb_pool;
|
||||
}
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
mutex_lock(&hdev->mmu_lock);
|
||||
rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
|
||||
hdev->internal_cb_pool_dma_addr,
|
||||
HOST_SPACE_INTERNAL_CB_SZ);
|
||||
|
||||
hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
mutex_unlock(&hdev->mmu_lock);
|
||||
|
||||
if (rc)
|
||||
goto unreserve_internal_cb_pool;
|
||||
@ -8425,13 +8440,13 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
|
||||
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
|
||||
return;
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
mutex_lock(&hdev->mmu_lock);
|
||||
hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
|
||||
HOST_SPACE_INTERNAL_CB_SZ);
|
||||
hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
|
||||
HOST_SPACE_INTERNAL_CB_SZ);
|
||||
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
mutex_unlock(&hdev->mmu_lock);
|
||||
|
||||
gen_pool_destroy(hdev->internal_cb_pool);
|
||||
|
||||
@ -9148,6 +9163,11 @@ static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group
|
||||
dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
|
||||
}
|
||||
|
||||
static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct hl_asic_funcs gaudi_funcs = {
|
||||
.early_init = gaudi_early_init,
|
||||
.early_fini = gaudi_early_fini,
|
||||
@ -9192,11 +9212,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
|
||||
.send_heartbeat = gaudi_send_heartbeat,
|
||||
.debug_coresight = gaudi_debug_coresight,
|
||||
.is_device_idle = gaudi_is_device_idle,
|
||||
.non_hard_reset_late_init = gaudi_non_hard_reset_late_init,
|
||||
.compute_reset_late_init = gaudi_compute_reset_late_init,
|
||||
.hw_queues_lock = gaudi_hw_queues_lock,
|
||||
.hw_queues_unlock = gaudi_hw_queues_unlock,
|
||||
.kdma_lock = NULL,
|
||||
.kdma_unlock = NULL,
|
||||
.get_pci_id = gaudi_get_pci_id,
|
||||
.get_eeprom_data = gaudi_get_eeprom_data,
|
||||
.get_monitor_dump = gaudi_get_monitor_dump,
|
||||
@ -9242,6 +9260,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
|
||||
.mmu_get_real_page_size = hl_mmu_get_real_page_size,
|
||||
.access_dev_mem = hl_access_dev_mem,
|
||||
.set_dram_bar_base = gaudi_set_hbm_bar_base,
|
||||
.send_device_activity = gaudi_send_device_activity,
|
||||
};
|
||||
|
||||
/**
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -15,7 +15,6 @@
|
||||
#include "../include/gaudi2/gaudi2_packets.h"
|
||||
#include "../include/gaudi2/gaudi2_fw_if.h"
|
||||
#include "../include/gaudi2/gaudi2_async_events.h"
|
||||
#include "../include/gaudi2/gaudi2_async_virt_events.h"
|
||||
|
||||
#define GAUDI2_LINUX_FW_FILE "habanalabs/gaudi2/gaudi2-fit.itb"
|
||||
#define GAUDI2_BOOT_FIT_FILE "habanalabs/gaudi2/gaudi2-boot-fit.itb"
|
||||
@ -140,9 +139,6 @@
|
||||
#define VA_HOST_SPACE_HPAGE_START 0xFFF0800000000000ull
|
||||
#define VA_HOST_SPACE_HPAGE_END 0xFFF1000000000000ull /* 140TB */
|
||||
|
||||
#define VA_HOST_SPACE_USER_MAPPED_CB_START 0xFFF1000000000000ull
|
||||
#define VA_HOST_SPACE_USER_MAPPED_CB_END 0xFFF1000100000000ull /* 4GB */
|
||||
|
||||
/* 140TB */
|
||||
#define VA_HOST_SPACE_PAGE_SIZE (VA_HOST_SPACE_PAGE_END - VA_HOST_SPACE_PAGE_START)
|
||||
|
||||
@ -458,7 +454,6 @@ struct dup_block_ctx {
|
||||
* the user can map.
|
||||
* @lfsr_rand_seeds: array of MME ACC random seeds to set.
|
||||
* @hw_queues_lock: protects the H/W queues from concurrent access.
|
||||
* @kdma_lock: protects the KDMA engine from concurrent access.
|
||||
* @scratchpad_kernel_address: general purpose PAGE_SIZE contiguous memory,
|
||||
* this memory region should be write-only.
|
||||
* currently used for HBW QMAN writes which is
|
||||
@ -510,9 +505,6 @@ struct dup_block_ctx {
|
||||
* @flush_db_fifo: flag to force flush DB FIFO after a write.
|
||||
* @hbm_cfg: HBM subsystem settings
|
||||
* @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock.
|
||||
* @kdma_lock_mutex: used by simulator instead of kdma_lock.
|
||||
* @use_deprecated_event_mappings: use old event mappings which are about to be
|
||||
* deprecated
|
||||
*/
|
||||
struct gaudi2_device {
|
||||
int (*cpucp_info_get)(struct hl_device *hdev);
|
||||
@ -521,7 +513,6 @@ struct gaudi2_device {
|
||||
int lfsr_rand_seeds[MME_NUM_OF_LFSR_SEEDS];
|
||||
|
||||
spinlock_t hw_queues_lock;
|
||||
spinlock_t kdma_lock;
|
||||
|
||||
void *scratchpad_kernel_address;
|
||||
dma_addr_t scratchpad_bus_address;
|
||||
@ -562,5 +553,6 @@ void gaudi2_pb_print_security_errors(struct hl_device *hdev, u32 block_addr, u32
|
||||
u32 offended_addr);
|
||||
int gaudi2_init_security(struct hl_device *hdev);
|
||||
void gaudi2_ack_protection_bits_errors(struct hl_device *hdev);
|
||||
int gaudi2_send_device_activity(struct hl_device *hdev, bool open);
|
||||
|
||||
#endif /* GAUDI2P_H_ */
|
||||
|
@ -51,12 +51,18 @@
|
||||
(0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \
|
||||
(0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT))
|
||||
|
||||
#define PDMA1_QMAN_ENABLE \
|
||||
#define PDMA0_QMAN_ENABLE \
|
||||
((0x3 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \
|
||||
(0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \
|
||||
(0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \
|
||||
(0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT))
|
||||
|
||||
#define PDMA1_QMAN_ENABLE \
|
||||
((0x1 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \
|
||||
(0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \
|
||||
(0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \
|
||||
(0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT))
|
||||
|
||||
/* QM_IDLE_MASK is valid for all engines QM idle check */
|
||||
#define QM_IDLE_MASK (DCORE0_EDMA0_QM_GLBL_STS0_PQF_IDLE_MASK | \
|
||||
DCORE0_EDMA0_QM_GLBL_STS0_CQF_IDLE_MASK | \
|
||||
@ -138,4 +144,17 @@
|
||||
#define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_SHIFT 15
|
||||
#define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK 0x8000
|
||||
|
||||
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_SHIFT 0
|
||||
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK 0x1
|
||||
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_SHIFT 1
|
||||
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK 0x2
|
||||
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_SHIFT 2
|
||||
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK 0x4
|
||||
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_SHIFT 3
|
||||
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_MASK 0x8
|
||||
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_SHIFT 4
|
||||
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_MASK 0x10
|
||||
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_SHIFT 5
|
||||
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_MASK 0x20
|
||||
|
||||
#endif /* GAUDI2_MASKS_H_ */
|
||||
|
@ -2559,6 +2559,10 @@ static const u32 gaudi2_pb_pcie[] = {
|
||||
mmPCIE_WRAP_BASE,
|
||||
};
|
||||
|
||||
static const u32 gaudi2_pb_pcie_unsecured_regs[] = {
|
||||
mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0,
|
||||
};
|
||||
|
||||
static const u32 gaudi2_pb_thermal_sensor0[] = {
|
||||
mmDCORE0_XFT_BASE,
|
||||
mmDCORE0_TSTDVS_BASE,
|
||||
@ -2583,9 +2587,9 @@ struct gaudi2_tpc_pb_data {
|
||||
};
|
||||
|
||||
static void gaudi2_config_tpcs_glbl_sec(struct hl_device *hdev, int dcore, int inst, u32 offset,
|
||||
void *data)
|
||||
struct iterate_module_ctx *ctx)
|
||||
{
|
||||
struct gaudi2_tpc_pb_data *pb_data = (struct gaudi2_tpc_pb_data *)data;
|
||||
struct gaudi2_tpc_pb_data *pb_data = ctx->data;
|
||||
|
||||
hl_config_glbl_sec(hdev, gaudi2_pb_dcr0_tpc0, pb_data->glbl_sec,
|
||||
offset, pb_data->block_array_size);
|
||||
@ -2660,15 +2664,14 @@ static int gaudi2_init_pb_tpc(struct hl_device *hdev)
|
||||
struct gaudi2_tpc_arc_pb_data {
|
||||
u32 unsecured_regs_arr_size;
|
||||
u32 arc_regs_arr_size;
|
||||
int rc;
|
||||
};
|
||||
|
||||
static void gaudi2_config_tpcs_pb_ranges(struct hl_device *hdev, int dcore, int inst, u32 offset,
|
||||
void *data)
|
||||
struct iterate_module_ctx *ctx)
|
||||
{
|
||||
struct gaudi2_tpc_arc_pb_data *pb_data = (struct gaudi2_tpc_arc_pb_data *)data;
|
||||
struct gaudi2_tpc_arc_pb_data *pb_data = ctx->data;
|
||||
|
||||
pb_data->rc |= hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, 1,
|
||||
ctx->rc = hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, 1,
|
||||
offset, gaudi2_pb_dcr0_tpc0_arc,
|
||||
pb_data->arc_regs_arr_size,
|
||||
gaudi2_pb_dcr0_tpc0_arc_unsecured_regs,
|
||||
@ -2683,12 +2686,12 @@ static int gaudi2_init_pb_tpc_arc(struct hl_device *hdev)
|
||||
tpc_arc_pb_data.arc_regs_arr_size = ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc);
|
||||
tpc_arc_pb_data.unsecured_regs_arr_size =
|
||||
ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc_unsecured_regs);
|
||||
tpc_arc_pb_data.rc = 0;
|
||||
|
||||
tpc_iter.fn = &gaudi2_config_tpcs_pb_ranges;
|
||||
tpc_iter.data = &tpc_arc_pb_data;
|
||||
gaudi2_iterate_tpcs(hdev, &tpc_iter);
|
||||
|
||||
return tpc_arc_pb_data.rc;
|
||||
return tpc_iter.rc;
|
||||
}
|
||||
|
||||
static int gaudi2_init_pb_sm_objs(struct hl_device *hdev)
|
||||
@ -3419,7 +3422,8 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev)
|
||||
rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,
|
||||
HL_PB_SINGLE_INSTANCE, HL_PB_NA,
|
||||
gaudi2_pb_pcie, ARRAY_SIZE(gaudi2_pb_pcie),
|
||||
NULL, HL_PB_NA);
|
||||
gaudi2_pb_pcie_unsecured_regs,
|
||||
ARRAY_SIZE(gaudi2_pb_pcie_unsecured_regs));
|
||||
|
||||
/* Thermal Sensor.
|
||||
* Skip when security is enabled in F/W, because the blocks are protected by privileged RR.
|
||||
@ -3547,9 +3551,9 @@ struct gaudi2_ack_pb_tpc_data {
|
||||
};
|
||||
|
||||
static void gaudi2_ack_pb_tpc_config(struct hl_device *hdev, int dcore, int inst, u32 offset,
|
||||
void *data)
|
||||
struct iterate_module_ctx *ctx)
|
||||
{
|
||||
struct gaudi2_ack_pb_tpc_data *pb_data = (struct gaudi2_ack_pb_tpc_data *)data;
|
||||
struct gaudi2_ack_pb_tpc_data *pb_data = ctx->data;
|
||||
|
||||
hl_ack_pb_single_dcore(hdev, offset, HL_PB_SINGLE_INSTANCE, HL_PB_NA,
|
||||
gaudi2_pb_dcr0_tpc0, pb_data->tpc_regs_array_size);
|
||||
|
@ -916,26 +916,11 @@ int goya_late_init(struct hl_device *hdev)
|
||||
*/
|
||||
void goya_late_fini(struct hl_device *hdev)
|
||||
{
|
||||
const struct hwmon_channel_info **channel_info_arr;
|
||||
struct goya_device *goya = hdev->asic_specific;
|
||||
int i = 0;
|
||||
|
||||
cancel_delayed_work_sync(&goya->goya_work->work_freq);
|
||||
|
||||
if (!hdev->hl_chip_info->info)
|
||||
return;
|
||||
|
||||
channel_info_arr = hdev->hl_chip_info->info;
|
||||
|
||||
while (channel_info_arr[i]) {
|
||||
kfree(channel_info_arr[i]->config);
|
||||
kfree(channel_info_arr[i]);
|
||||
i++;
|
||||
}
|
||||
|
||||
kfree(channel_info_arr);
|
||||
|
||||
hdev->hl_chip_info->info = NULL;
|
||||
hl_hwmon_release_resources(hdev);
|
||||
}
|
||||
|
||||
static void goya_set_pci_memory_regions(struct hl_device *hdev)
|
||||
@ -1040,6 +1025,7 @@ static int goya_sw_init(struct hl_device *hdev)
|
||||
hdev->asic_prop.supports_compute_reset = true;
|
||||
hdev->asic_prop.allow_inference_soft_reset = true;
|
||||
hdev->supports_wait_for_multi_cs = false;
|
||||
hdev->supports_ctx_switch = true;
|
||||
|
||||
hdev->asic_funcs->set_pci_memory_regions(hdev);
|
||||
|
||||
@ -4559,7 +4545,7 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int goya_non_hard_reset_late_init(struct hl_device *hdev)
|
||||
static int goya_compute_reset_late_init(struct hl_device *hdev)
|
||||
{
|
||||
/*
|
||||
* Unmask all IRQs since some could have been received
|
||||
@ -5137,8 +5123,8 @@ int goya_cpucp_info_get(struct hl_device *hdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
|
||||
u8 mask_len, struct seq_file *s)
|
||||
static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
|
||||
struct engines_data *e)
|
||||
{
|
||||
const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
|
||||
const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
|
||||
@ -5149,9 +5135,9 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
|
||||
u64 offset;
|
||||
int i;
|
||||
|
||||
if (s)
|
||||
seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
|
||||
"--- ------- ------------ -------------\n");
|
||||
if (e)
|
||||
hl_engine_data_sprintf(e, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
|
||||
"--- ------- ------------ -------------\n");
|
||||
|
||||
offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
|
||||
|
||||
@ -5164,13 +5150,13 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
|
||||
|
||||
if (mask && !is_eng_idle)
|
||||
set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask);
|
||||
if (s)
|
||||
seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
|
||||
if (e)
|
||||
hl_engine_data_sprintf(e, dma_fmt, i, is_eng_idle ? "Y" : "N",
|
||||
qm_glbl_sts0, dma_core_sts0);
|
||||
}
|
||||
|
||||
if (s)
|
||||
seq_puts(s,
|
||||
if (e)
|
||||
hl_engine_data_sprintf(e,
|
||||
"\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n"
|
||||
"--- ------- ------------ -------------- ----------\n");
|
||||
|
||||
@ -5187,13 +5173,13 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
|
||||
|
||||
if (mask && !is_eng_idle)
|
||||
set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask);
|
||||
if (s)
|
||||
seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
|
||||
if (e)
|
||||
hl_engine_data_sprintf(e, fmt, i, is_eng_idle ? "Y" : "N",
|
||||
qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
|
||||
}
|
||||
|
||||
if (s)
|
||||
seq_puts(s,
|
||||
if (e)
|
||||
hl_engine_data_sprintf(e,
|
||||
"\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n"
|
||||
"--- ------- ------------ -------------- -----------\n");
|
||||
|
||||
@ -5207,10 +5193,10 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
|
||||
|
||||
if (mask && !is_eng_idle)
|
||||
set_bit(GOYA_ENGINE_ID_MME_0, mask);
|
||||
if (s) {
|
||||
seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
|
||||
if (e) {
|
||||
hl_engine_data_sprintf(e, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
|
||||
cmdq_glbl_sts0, mme_arch_sts);
|
||||
seq_puts(s, "\n");
|
||||
hl_engine_data_sprintf(e, "\n");
|
||||
}
|
||||
|
||||
return is_idle;
|
||||
@ -5434,6 +5420,11 @@ static int goya_scrub_device_dram(struct hl_device *hdev, u64 val)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int goya_send_device_activity(struct hl_device *hdev, bool open)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct hl_asic_funcs goya_funcs = {
|
||||
.early_init = goya_early_init,
|
||||
.early_fini = goya_early_fini,
|
||||
@ -5478,11 +5469,9 @@ static const struct hl_asic_funcs goya_funcs = {
|
||||
.send_heartbeat = goya_send_heartbeat,
|
||||
.debug_coresight = goya_debug_coresight,
|
||||
.is_device_idle = goya_is_device_idle,
|
||||
.non_hard_reset_late_init = goya_non_hard_reset_late_init,
|
||||
.compute_reset_late_init = goya_compute_reset_late_init,
|
||||
.hw_queues_lock = goya_hw_queues_lock,
|
||||
.hw_queues_unlock = goya_hw_queues_unlock,
|
||||
.kdma_lock = NULL,
|
||||
.kdma_unlock = NULL,
|
||||
.get_pci_id = goya_get_pci_id,
|
||||
.get_eeprom_data = goya_get_eeprom_data,
|
||||
.get_monitor_dump = goya_get_monitor_dump,
|
||||
@ -5528,6 +5517,7 @@ static const struct hl_asic_funcs goya_funcs = {
|
||||
.mmu_get_real_page_size = hl_mmu_get_real_page_size,
|
||||
.access_dev_mem = hl_access_dev_mem,
|
||||
.set_dram_bar_base = goya_set_ddr_bar_base,
|
||||
.send_device_activity = goya_send_device_activity,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -629,6 +629,12 @@ enum pq_init_status {
|
||||
* CPUCP_PACKET_ENGINE_CORE_ASID_SET -
|
||||
* Packet to perform engine core ASID configuration
|
||||
*
|
||||
* CPUCP_PACKET_SEC_ATTEST_GET -
|
||||
* Get the attestaion data that is collected during various stages of the
|
||||
* boot sequence. the attestation data is also hashed with some unique
|
||||
* number (nonce) provided by the host to prevent replay attacks.
|
||||
* public key and certificate also provided as part of the FW response.
|
||||
*
|
||||
* CPUCP_PACKET_MONITOR_DUMP_GET -
|
||||
* Get monitors registers dump from the CpuCP kernel.
|
||||
* The CPU will put the registers dump in the a buffer allocated by the driver
|
||||
@ -636,6 +642,10 @@ enum pq_init_status {
|
||||
* passes the max size it allows the CpuCP to write to the structure, to prevent
|
||||
* data corruption in case of mismatched driver/FW versions.
|
||||
* Relevant only to Gaudi.
|
||||
*
|
||||
* CPUCP_PACKET_ACTIVE_STATUS_SET -
|
||||
* LKD sends FW indication whether device is free or in use, this indication is reported
|
||||
* also to the BMC.
|
||||
*/
|
||||
|
||||
enum cpucp_packet_id {
|
||||
@ -687,10 +697,17 @@ enum cpucp_packet_id {
|
||||
CPUCP_PACKET_RESERVED, /* not used */
|
||||
CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */
|
||||
CPUCP_PACKET_RESERVED2, /* not used */
|
||||
CPUCP_PACKET_SEC_ATTEST_GET, /* internal */
|
||||
CPUCP_PACKET_RESERVED3, /* not used */
|
||||
CPUCP_PACKET_RESERVED4, /* not used */
|
||||
CPUCP_PACKET_RESERVED5, /* not used */
|
||||
CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */
|
||||
CPUCP_PACKET_RESERVED5, /* not used */
|
||||
CPUCP_PACKET_RESERVED6, /* not used */
|
||||
CPUCP_PACKET_RESERVED7, /* not used */
|
||||
CPUCP_PACKET_RESERVED8, /* not used */
|
||||
CPUCP_PACKET_RESERVED9, /* not used */
|
||||
CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */
|
||||
CPUCP_PACKET_ID_MAX /* must be last */
|
||||
};
|
||||
|
||||
#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
|
||||
@ -783,6 +800,9 @@ struct cpucp_packet {
|
||||
* result cannot be used to hold general purpose data.
|
||||
*/
|
||||
__le32 status_mask;
|
||||
|
||||
/* random, used once number, for security packets */
|
||||
__le32 nonce;
|
||||
};
|
||||
|
||||
/* For NIC requests */
|
||||
@ -813,10 +833,25 @@ enum cpucp_led_index {
|
||||
CPUCP_LED2_INDEX
|
||||
};
|
||||
|
||||
/*
|
||||
* enum cpucp_packet_rc - Error return code
|
||||
* @cpucp_packet_success -> in case of success.
|
||||
* @cpucp_packet_invalid -> this is to support Goya and Gaudi platform.
|
||||
* @cpucp_packet_fault -> in case of processing error like failing to
|
||||
* get device binding or semaphore etc.
|
||||
* @cpucp_packet_invalid_pkt -> when cpucp packet is un-supported. This is
|
||||
* supported Greco onwards.
|
||||
* @cpucp_packet_invalid_params -> when checking parameter like length of buffer
|
||||
* or attribute value etc. Supported Greco onwards.
|
||||
* @cpucp_packet_rc_max -> It indicates size of enum so should be at last.
|
||||
*/
|
||||
enum cpucp_packet_rc {
|
||||
cpucp_packet_success,
|
||||
cpucp_packet_invalid,
|
||||
cpucp_packet_fault
|
||||
cpucp_packet_fault,
|
||||
cpucp_packet_invalid_pkt,
|
||||
cpucp_packet_invalid_params,
|
||||
cpucp_packet_rc_max
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1193,6 +1228,70 @@ enum cpu_reset_status {
|
||||
CPU_RST_STATUS_SOFT_RST_DONE = 1,
|
||||
};
|
||||
|
||||
#define SEC_PCR_DATA_BUF_SZ 256
|
||||
#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */
|
||||
#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */
|
||||
#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */
|
||||
#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */
|
||||
|
||||
/*
|
||||
* struct cpucp_sec_attest_info - attestation report of the boot
|
||||
* @pcr_data: raw values of the PCR registers
|
||||
* @pcr_num_reg: number of PCR registers in the pcr_data array
|
||||
* @pcr_reg_len: length of each PCR register in the pcr_data array (bytes)
|
||||
* @nonce: number only used once. random number provided by host. this also
|
||||
* passed to the quote command as a qualifying data.
|
||||
* @pcr_quote_len: length of the attestation quote data (bytes)
|
||||
* @pcr_quote: attestation report data structure
|
||||
* @quote_sig_len: length of the attestation report signature (bytes)
|
||||
* @quote_sig: signature structure of the attestation report
|
||||
* @pub_data_len: length of the public data (bytes)
|
||||
* @public_data: public key for the signed attestation
|
||||
* (outPublic + name + qualifiedName)
|
||||
* @certificate_len: length of the certificate (bytes)
|
||||
* @certificate: certificate for the attestation signing key
|
||||
*/
|
||||
struct cpucp_sec_attest_info {
|
||||
__u8 pcr_data[SEC_PCR_DATA_BUF_SZ];
|
||||
__u8 pcr_num_reg;
|
||||
__u8 pcr_reg_len;
|
||||
__le16 pad0;
|
||||
__le32 nonce;
|
||||
__le16 pcr_quote_len;
|
||||
__u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ];
|
||||
__u8 quote_sig_len;
|
||||
__u8 quote_sig[SEC_SIGNATURE_BUF_SZ];
|
||||
__le16 pub_data_len;
|
||||
__u8 public_data[SEC_PUB_DATA_BUF_SZ];
|
||||
__le16 certificate_len;
|
||||
__u8 certificate[SEC_CERTIFICATE_BUF_SZ];
|
||||
};
|
||||
|
||||
/*
|
||||
* struct cpucp_dev_info_signed - device information signed by a secured device
|
||||
* @info: device information structure as defined above
|
||||
* @nonce: number only used once. random number provided by host. this number is
|
||||
* hashed and signed along with the device information.
|
||||
* @info_sig_len: length of the attestation signature (bytes)
|
||||
* @info_sig: signature of the info + nonce data.
|
||||
* @pub_data_len: length of the public data (bytes)
|
||||
* @public_data: public key info signed info data
|
||||
* (outPublic + name + qualifiedName)
|
||||
* @certificate_len: length of the certificate (bytes)
|
||||
* @certificate: certificate for the signing key
|
||||
*/
|
||||
struct cpucp_dev_info_signed {
|
||||
struct cpucp_info info; /* assumed to be 64bit aligned */
|
||||
__le32 nonce;
|
||||
__le32 pad0;
|
||||
__u8 info_sig_len;
|
||||
__u8 info_sig[SEC_SIGNATURE_BUF_SZ];
|
||||
__le16 pub_data_len;
|
||||
__u8 public_data[SEC_PUB_DATA_BUF_SZ];
|
||||
__le16 certificate_len;
|
||||
__u8 certificate[SEC_CERTIFICATE_BUF_SZ];
|
||||
};
|
||||
|
||||
/*
|
||||
* struct dcore_monitor_regs_data - DCORE monitor regs data.
|
||||
* the structure follows sync manager block layout. relevant only to Gaudi.
|
||||
|
@ -34,6 +34,7 @@ enum cpu_boot_err {
|
||||
CPU_BOOT_ERR_BINNING_FAIL = 19,
|
||||
CPU_BOOT_ERR_TPM_FAIL = 20,
|
||||
CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21,
|
||||
CPU_BOOT_ERR_EEPROM_FAIL = 22,
|
||||
CPU_BOOT_ERR_ENABLED = 31,
|
||||
CPU_BOOT_ERR_SCND_EN = 63,
|
||||
CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */
|
||||
@ -115,6 +116,9 @@ enum cpu_boot_err {
|
||||
* CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature
|
||||
* sensor.
|
||||
*
|
||||
* CPU_BOOT_ERR_EEPROM_FAIL Failed reading EEPROM data. Defaults
|
||||
* are used.
|
||||
*
|
||||
* CPU_BOOT_ERR0_ENABLED Error registers enabled.
|
||||
* This is a main indication that the
|
||||
* running FW populates the error
|
||||
@ -139,6 +143,7 @@ enum cpu_boot_err {
|
||||
#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL)
|
||||
#define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL)
|
||||
#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL)
|
||||
#define CPU_BOOT_ERR0_EEPROM_FAIL (1 << CPU_BOOT_ERR_EEPROM_FAIL)
|
||||
#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
|
||||
#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
|
||||
|
||||
@ -426,7 +431,9 @@ struct cpu_dyn_regs {
|
||||
__le32 gic_host_ints_irq;
|
||||
__le32 gic_host_soft_rst_irq;
|
||||
__le32 gic_rot_qm_irq_ctrl;
|
||||
__le32 reserved1[22]; /* reserve for future use */
|
||||
__le32 cpu_rst_status;
|
||||
__le32 eng_arc_irq_ctrl;
|
||||
__le32 reserved1[20]; /* reserve for future use */
|
||||
};
|
||||
|
||||
/* TODO: remove the desc magic after the code is updated to use message */
|
||||
@ -465,6 +472,26 @@ enum comms_msg_type {
|
||||
HL_COMMS_BINNING_CONF_TYPE = 3,
|
||||
};
|
||||
|
||||
/*
|
||||
* Binning information shared between LKD and FW
|
||||
* @tpc_mask - TPC binning information
|
||||
* @dec_mask - Decoder binning information
|
||||
* @hbm_mask - HBM binning information
|
||||
* @edma_mask - EDMA binning information
|
||||
* @mme_mask_l - MME binning information lower 32
|
||||
* @mme_mask_h - MME binning information upper 32
|
||||
* @reserved - reserved field for 64 bit alignment
|
||||
*/
|
||||
struct lkd_fw_binning_info {
|
||||
__le64 tpc_mask;
|
||||
__le32 dec_mask;
|
||||
__le32 hbm_mask;
|
||||
__le32 edma_mask;
|
||||
__le32 mme_mask_l;
|
||||
__le32 mme_mask_h;
|
||||
__le32 reserved;
|
||||
};
|
||||
|
||||
/* TODO: remove this struct after the code is updated to use message */
|
||||
/* this is the comms descriptor header - meta data */
|
||||
struct comms_desc_header {
|
||||
@ -525,13 +552,7 @@ struct lkd_fw_comms_msg {
|
||||
struct {
|
||||
__u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */
|
||||
};
|
||||
struct {
|
||||
__le64 tpc_binning_conf;
|
||||
__le32 dec_binning_conf;
|
||||
__le32 hbm_binning_conf;
|
||||
__le32 edma_binning_conf;
|
||||
__le32 mme_redundancy_conf; /* use MME_REDUNDANT_COLUMN */
|
||||
};
|
||||
struct lkd_fw_binning_info binning_info;
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -132,6 +132,7 @@
|
||||
#include "dcore0_mme_ctrl_lo_arch_tensor_a_regs.h"
|
||||
#include "dcore0_mme_ctrl_lo_arch_tensor_b_regs.h"
|
||||
#include "dcore0_mme_ctrl_lo_arch_tensor_cout_regs.h"
|
||||
#include "pcie_wrap_special_regs.h"
|
||||
|
||||
#include "pdma0_qm_masks.h"
|
||||
#include "pdma0_core_masks.h"
|
||||
@ -239,6 +240,7 @@
|
||||
#define SFT_IF_RTR_OFFSET (mmSFT0_HBW_RTR_IF1_RTR_H3_BASE - mmSFT0_HBW_RTR_IF0_RTR_H3_BASE)
|
||||
|
||||
#define ARC_HALT_REQ_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_REQ - mmARC_FARM_ARC0_AUX_BASE)
|
||||
#define ARC_HALT_ACK_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_ACK - mmARC_FARM_ARC0_AUX_BASE)
|
||||
|
||||
#define ARC_REGION_CFG_OFFSET(region) \
|
||||
(mmARC_FARM_ARC0_AUX_ARC_REGION_CFG_0 + (region * 4) - mmARC_FARM_ARC0_AUX_BASE)
|
||||
|
@ -0,0 +1,185 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Copyright 2016-2020 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
/************************************
|
||||
** This is an auto-generated file **
|
||||
** DO NOT EDIT BELOW **
|
||||
************************************/
|
||||
|
||||
#ifndef ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_
|
||||
#define ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_
|
||||
|
||||
/*
|
||||
*****************************************
|
||||
* PCIE_WRAP_SPECIAL
|
||||
* (Prototype: SPECIAL_REGS)
|
||||
*****************************************
|
||||
*/
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_0 0x4C01E80
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_1 0x4C01E84
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_2 0x4C01E88
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_3 0x4C01E8C
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_4 0x4C01E90
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_5 0x4C01E94
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_6 0x4C01E98
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_7 0x4C01E9C
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_8 0x4C01EA0
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_9 0x4C01EA4
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_10 0x4C01EA8
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_11 0x4C01EAC
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_12 0x4C01EB0
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_13 0x4C01EB4
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_14 0x4C01EB8
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_15 0x4C01EBC
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_16 0x4C01EC0
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_17 0x4C01EC4
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_18 0x4C01EC8
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_19 0x4C01ECC
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_20 0x4C01ED0
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_21 0x4C01ED4
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_22 0x4C01ED8
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_23 0x4C01EDC
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_24 0x4C01EE0
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_25 0x4C01EE4
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_26 0x4C01EE8
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_27 0x4C01EEC
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_28 0x4C01EF0
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_29 0x4C01EF4
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_30 0x4C01EF8
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_31 0x4C01EFC
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_MEM_GW_DATA 0x4C01F00
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_MEM_GW_REQ 0x4C01F04
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_MEM_NUMOF 0x4C01F0C
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_SEL 0x4C01F10
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_CTL 0x4C01F14
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_MASK 0x4C01F18
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_GLBL_ERR_MASK 0x4C01F1C
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_STS 0x4C01F20
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_ADDR 0x4C01F24
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_MEM_RM 0x4C01F28
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_MASK 0x4C01F40
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_ADDR 0x4C01F44
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_CAUSE 0x4C01F48
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0 0x4C01F60
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_1 0x4C01F64
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_2 0x4C01F68
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_3 0x4C01F6C
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_0 0x4C01F80
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_1 0x4C01F84
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_2 0x4C01F88
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_3 0x4C01F8C
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_4 0x4C01F90
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_5 0x4C01F94
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_6 0x4C01F98
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_7 0x4C01F9C
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_8 0x4C01FA0
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_9 0x4C01FA4
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_10 0x4C01FA8
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_11 0x4C01FAC
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_12 0x4C01FB0
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_13 0x4C01FB4
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_14 0x4C01FB8
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_15 0x4C01FBC
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_16 0x4C01FC0
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_17 0x4C01FC4
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_18 0x4C01FC8
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_19 0x4C01FCC
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_20 0x4C01FD0
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_21 0x4C01FD4
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_22 0x4C01FD8
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_23 0x4C01FDC
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_24 0x4C01FE0
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_25 0x4C01FE4
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_26 0x4C01FE8
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_27 0x4C01FEC
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_28 0x4C01FF0
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_29 0x4C01FF4
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_30 0x4C01FF8
|
||||
|
||||
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_31 0x4C01FFC
|
||||
|
||||
#endif /* ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_ */
|
@ -1,57 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Copyright 2022 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __GAUDI2_ASYNC_VIRT_EVENTS_H_
|
||||
#define __GAUDI2_ASYNC_VIRT_EVENTS_H_
|
||||
|
||||
enum gaudi2_async_virt_event_id {
|
||||
GAUDI2_EVENT_NIC3_QM1_OLD = 1206,
|
||||
GAUDI2_EVENT_NIC4_QM0_OLD = 1207,
|
||||
GAUDI2_EVENT_NIC4_QM1_OLD = 1208,
|
||||
GAUDI2_EVENT_NIC5_QM0_OLD = 1209,
|
||||
GAUDI2_EVENT_NIC5_QM1_OLD = 1210,
|
||||
GAUDI2_EVENT_NIC6_QM0_OLD = 1211,
|
||||
GAUDI2_EVENT_NIC6_QM1_OLD = 1212,
|
||||
GAUDI2_EVENT_NIC7_QM0_OLD = 1213,
|
||||
GAUDI2_EVENT_NIC7_QM1_OLD = 1214,
|
||||
GAUDI2_EVENT_NIC8_QM0_OLD = 1215,
|
||||
GAUDI2_EVENT_NIC8_QM1_OLD = 1216,
|
||||
GAUDI2_EVENT_NIC9_QM0_OLD = 1217,
|
||||
GAUDI2_EVENT_NIC9_QM1_OLD = 1218,
|
||||
GAUDI2_EVENT_NIC10_QM0_OLD = 1219,
|
||||
GAUDI2_EVENT_NIC10_QM1_OLD = 1220,
|
||||
GAUDI2_EVENT_NIC11_QM0_OLD = 1221,
|
||||
GAUDI2_EVENT_NIC11_QM1_OLD = 1222,
|
||||
GAUDI2_EVENT_CPU_PKT_SANITY_FAILED_OLD = 1223,
|
||||
GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0_OLD = 1224,
|
||||
GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG1_OLD = 1225,
|
||||
GAUDI2_EVENT_CPU1_STATUS_NIC1_ENG0_OLD = 1226,
|
||||
GAUDI2_EVENT_CPU1_STATUS_NIC1_ENG1_OLD = 1227,
|
||||
GAUDI2_EVENT_CPU2_STATUS_NIC2_ENG0_OLD = 1228,
|
||||
GAUDI2_EVENT_CPU2_STATUS_NIC2_ENG1_OLD = 1229,
|
||||
GAUDI2_EVENT_CPU3_STATUS_NIC3_ENG0_OLD = 1230,
|
||||
GAUDI2_EVENT_CPU3_STATUS_NIC3_ENG1_OLD = 1231,
|
||||
GAUDI2_EVENT_CPU4_STATUS_NIC4_ENG0_OLD = 1232,
|
||||
GAUDI2_EVENT_CPU4_STATUS_NIC4_ENG1_OLD = 1233,
|
||||
GAUDI2_EVENT_CPU5_STATUS_NIC5_ENG0_OLD = 1234,
|
||||
GAUDI2_EVENT_CPU5_STATUS_NIC5_ENG1_OLD = 1235,
|
||||
GAUDI2_EVENT_CPU6_STATUS_NIC6_ENG0_OLD = 1236,
|
||||
GAUDI2_EVENT_CPU6_STATUS_NIC6_ENG1_OLD = 1237,
|
||||
GAUDI2_EVENT_CPU7_STATUS_NIC7_ENG0_OLD = 1238,
|
||||
GAUDI2_EVENT_CPU7_STATUS_NIC7_ENG1_OLD = 1239,
|
||||
GAUDI2_EVENT_CPU8_STATUS_NIC8_ENG0_OLD = 1240,
|
||||
GAUDI2_EVENT_CPU8_STATUS_NIC8_ENG1_OLD = 1241,
|
||||
GAUDI2_EVENT_CPU9_STATUS_NIC9_ENG0_OLD = 1242,
|
||||
GAUDI2_EVENT_CPU9_STATUS_NIC9_ENG1_OLD = 1243,
|
||||
GAUDI2_EVENT_CPU10_STATUS_NIC10_ENG0_OLD = 1244,
|
||||
GAUDI2_EVENT_CPU10_STATUS_NIC10_ENG1_OLD = 1245,
|
||||
GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG0_OLD = 1246,
|
||||
GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1_OLD = 1247,
|
||||
GAUDI2_EVENT_ARC_DCCM_FULL_OLD = 1248,
|
||||
};
|
||||
|
||||
#endif /* __GAUDI2_ASYNC_VIRT_EVENTS_H_ */
|
93
include/trace/events/habanalabs.h
Normal file
93
include/trace/events/habanalabs.h
Normal file
@ -0,0 +1,93 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Copyright 2016-2021 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM habanalabs
|
||||
|
||||
#if !defined(_TRACE_HABANALABS_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_HABANALABS_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
DECLARE_EVENT_CLASS(habanalabs_mmu_template,
|
||||
TP_PROTO(struct device *dev, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte),
|
||||
|
||||
TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__string(dname, dev_name(dev))
|
||||
__field(u64, virt_addr)
|
||||
__field(u64, phys_addr)
|
||||
__field(u32, page_size)
|
||||
__field(u8, flush_pte)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__assign_str(dname, dev_name(dev));
|
||||
__entry->virt_addr = virt_addr;
|
||||
__entry->phys_addr = phys_addr;
|
||||
__entry->page_size = page_size;
|
||||
__entry->flush_pte = flush_pte;
|
||||
),
|
||||
|
||||
TP_printk("%s: vaddr: %#llx, paddr: %#llx, psize: %#x, flush: %s",
|
||||
__get_str(dname),
|
||||
__entry->virt_addr,
|
||||
__entry->phys_addr,
|
||||
__entry->page_size,
|
||||
__entry->flush_pte ? "true" : "false")
|
||||
);
|
||||
|
||||
DEFINE_EVENT(habanalabs_mmu_template, habanalabs_mmu_map,
|
||||
TP_PROTO(struct device *dev, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte),
|
||||
TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte));
|
||||
|
||||
DEFINE_EVENT(habanalabs_mmu_template, habanalabs_mmu_unmap,
|
||||
TP_PROTO(struct device *dev, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte),
|
||||
TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte));
|
||||
|
||||
DECLARE_EVENT_CLASS(habanalabs_dma_alloc_template,
|
||||
TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller),
|
||||
|
||||
TP_ARGS(dev, cpu_addr, dma_addr, size, caller),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__string(dname, dev_name(dev))
|
||||
__field(u64, cpu_addr)
|
||||
__field(u64, dma_addr)
|
||||
__field(u32, size)
|
||||
__field(const char *, caller)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__assign_str(dname, dev_name(dev));
|
||||
__entry->cpu_addr = cpu_addr;
|
||||
__entry->dma_addr = dma_addr;
|
||||
__entry->size = size;
|
||||
__entry->caller = caller;
|
||||
),
|
||||
|
||||
TP_printk("%s: cpu_addr: %#llx, dma_addr: %#llx, size: %#x, caller: %s",
|
||||
__get_str(dname),
|
||||
__entry->cpu_addr,
|
||||
__entry->dma_addr,
|
||||
__entry->size,
|
||||
__entry->caller)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(habanalabs_dma_alloc_template, habanalabs_dma_alloc,
|
||||
TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller),
|
||||
TP_ARGS(dev, cpu_addr, dma_addr, size, caller));
|
||||
|
||||
DEFINE_EVENT(habanalabs_dma_alloc_template, habanalabs_dma_free,
|
||||
TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller),
|
||||
TP_ARGS(dev, cpu_addr, dma_addr, size, caller));
|
||||
|
||||
#endif /* if !defined(_TRACE_HABANALABS_H) || defined(TRACE_HEADER_MULTI_READ) */
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
@ -707,6 +707,25 @@ enum hl_server_type {
|
||||
HL_SERVER_GAUDI2_HLS2 = 5
|
||||
};
|
||||
|
||||
/*
|
||||
* Notifier event values - for the notification mechanism and the HL_INFO_GET_EVENTS command
|
||||
*
|
||||
* HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event
|
||||
* HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code
|
||||
* HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset
|
||||
* HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error
|
||||
* HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE - Indicates device is unavailable
|
||||
* HL_NOTIFIER_EVENT_USER_ENGINE_ERR - Indicates device engine in error state
|
||||
* HL_NOTIFIER_EVENT_GENERAL_HW_ERR - Indicates device HW error
|
||||
*/
|
||||
#define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0)
|
||||
#define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1)
|
||||
#define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2)
|
||||
#define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3)
|
||||
#define HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE (1ULL << 4)
|
||||
#define HL_NOTIFIER_EVENT_USER_ENGINE_ERR (1ULL << 5)
|
||||
#define HL_NOTIFIER_EVENT_GENERAL_HW_ERR (1ULL << 6)
|
||||
|
||||
/* Opcode for management ioctl
|
||||
*
|
||||
* HW_IP_INFO - Receive information about different IP blocks in the
|
||||
@ -754,6 +773,7 @@ enum hl_server_type {
|
||||
* Razwi initiator.
|
||||
* Razwi cause, was it a page fault or MMU access error.
|
||||
* HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES - Retrieve valid page sizes for device memory allocation
|
||||
* HL_INFO_SECURED_ATTESTATION - Retrieve attestation report of the boot.
|
||||
* HL_INFO_REGISTER_EVENTFD - Register eventfd for event notifications.
|
||||
* HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd
|
||||
* HL_INFO_GET_EVENTS - Retrieve the last occurred events
|
||||
@ -783,14 +803,19 @@ enum hl_server_type {
|
||||
#define HL_INFO_CS_TIMEOUT_EVENT 24
|
||||
#define HL_INFO_RAZWI_EVENT 25
|
||||
#define HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES 26
|
||||
#define HL_INFO_SECURED_ATTESTATION 27
|
||||
#define HL_INFO_REGISTER_EVENTFD 28
|
||||
#define HL_INFO_UNREGISTER_EVENTFD 29
|
||||
#define HL_INFO_GET_EVENTS 30
|
||||
#define HL_INFO_UNDEFINED_OPCODE_EVENT 31
|
||||
#define HL_INFO_ENGINE_STATUS 32
|
||||
|
||||
#define HL_INFO_VERSION_MAX_LEN 128
|
||||
#define HL_INFO_CARD_NAME_MAX_LEN 16
|
||||
|
||||
/* Maximum buffer size for retrieving engines status */
|
||||
#define HL_ENGINES_DATA_MAX_SIZE SZ_1M
|
||||
|
||||
/**
|
||||
* struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC
|
||||
* @sram_base_address: The first SRAM physical base address that is free to be
|
||||
@ -821,6 +846,7 @@ enum hl_server_type {
|
||||
* @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant
|
||||
* for Goya/Gaudi only.
|
||||
* @dram_enabled: Whether the DRAM is enabled.
|
||||
* @security_enabled: Whether security is enabled on device.
|
||||
* @mme_master_slave_mode: Indicate whether the MME is working in master/slave
|
||||
* configuration. Relevant for Greco and later.
|
||||
* @cpucp_version: The CPUCP f/w version.
|
||||
@ -852,7 +878,7 @@ struct hl_info_hw_ip_info {
|
||||
__u32 psoc_pci_pll_div_factor;
|
||||
__u8 tpc_enabled_mask;
|
||||
__u8 dram_enabled;
|
||||
__u8 reserved;
|
||||
__u8 security_enabled;
|
||||
__u8 mme_master_slave_mode;
|
||||
__u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
|
||||
__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
|
||||
@ -876,13 +902,13 @@ struct hl_info_hw_idle {
|
||||
__u32 is_idle;
|
||||
/*
|
||||
* Bitmask of busy engines.
|
||||
* Bits definition is according to `enum <chip>_enging_id'.
|
||||
* Bits definition is according to `enum <chip>_engine_id'.
|
||||
*/
|
||||
__u32 busy_engines_mask;
|
||||
|
||||
/*
|
||||
* Extended Bitmask of busy engines.
|
||||
* Bits definition is according to `enum <chip>_enging_id'.
|
||||
* Bits definition is according to `enum <chip>_engine_id'.
|
||||
*/
|
||||
__u64 busy_engines_mask_ext[HL_BUSY_ENGINES_MASK_EXT_SIZE];
|
||||
};
|
||||
@ -1078,12 +1104,12 @@ struct hl_info_razwi_event {
|
||||
* struct hl_info_undefined_opcode_event - info about last undefined opcode error
|
||||
* @timestamp: timestamp of the undefined opcode error
|
||||
* @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ
|
||||
* entiers. In case all streams array entries are
|
||||
* entries. In case all streams array entries are
|
||||
* filled with values, it means the execution was in Lower-CP.
|
||||
* @cq_addr: the address of the current handled command buffer
|
||||
* @cq_size: the size of the current handled command buffer
|
||||
* @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array.
|
||||
* should be equal to 1 incase of undefined opcode
|
||||
* should be equal to 1 in case of undefined opcode
|
||||
* in Upper-CP (specific stream) and equal to 4 incase
|
||||
* of undefined opcode in Lower-CP.
|
||||
* @engine_id: engine-id that the error occurred on
|
||||
@ -1109,6 +1135,45 @@ struct hl_info_dev_memalloc_page_sizes {
|
||||
__u64 page_order_bitmask;
|
||||
};
|
||||
|
||||
#define SEC_PCR_DATA_BUF_SZ 256
|
||||
#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */
|
||||
#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */
|
||||
#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */
|
||||
#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */
|
||||
|
||||
/*
|
||||
* struct hl_info_sec_attest - attestation report of the boot
|
||||
* @nonce: number only used once. random number provided by host. this also passed to the quote
|
||||
* command as a qualifying data.
|
||||
* @pcr_quote_len: length of the attestation quote data (bytes)
|
||||
* @pub_data_len: length of the public data (bytes)
|
||||
* @certificate_len: length of the certificate (bytes)
|
||||
* @pcr_num_reg: number of PCR registers in the pcr_data array
|
||||
* @pcr_reg_len: length of each PCR register in the pcr_data array (bytes)
|
||||
* @quote_sig_len: length of the attestation report signature (bytes)
|
||||
* @pcr_data: raw values of the PCR registers
|
||||
* @pcr_quote: attestation report data structure
|
||||
* @quote_sig: signature structure of the attestation report
|
||||
* @public_data: public key for the signed attestation
|
||||
* (outPublic + name + qualifiedName)
|
||||
* @certificate: certificate for the attestation signing key
|
||||
*/
|
||||
struct hl_info_sec_attest {
|
||||
__u32 nonce;
|
||||
__u16 pcr_quote_len;
|
||||
__u16 pub_data_len;
|
||||
__u16 certificate_len;
|
||||
__u8 pcr_num_reg;
|
||||
__u8 pcr_reg_len;
|
||||
__u8 quote_sig_len;
|
||||
__u8 pcr_data[SEC_PCR_DATA_BUF_SZ];
|
||||
__u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ];
|
||||
__u8 quote_sig[SEC_SIGNATURE_BUF_SZ];
|
||||
__u8 public_data[SEC_PUB_DATA_BUF_SZ];
|
||||
__u8 certificate[SEC_CERTIFICATE_BUF_SZ];
|
||||
__u8 pad0[2];
|
||||
};
|
||||
|
||||
enum gaudi_dcores {
|
||||
HL_GAUDI_WS_DCORE,
|
||||
HL_GAUDI_WN_DCORE,
|
||||
@ -1130,6 +1195,11 @@ enum gaudi_dcores {
|
||||
* resolution. Currently not in use.
|
||||
* @pll_index: Index as defined in hl_<asic type>_pll_index enumeration.
|
||||
* @eventfd: event file descriptor for event notifications.
|
||||
* @user_buffer_actual_size: Actual data size which was copied to user allocated buffer by the
|
||||
* driver. It is possible for the user to allocate buffer larger than
|
||||
* needed, hence updating this variable so user will know the exact amount
|
||||
* of bytes copied by the kernel to the buffer.
|
||||
* @sec_attest_nonce: Nonce number used for attestation report.
|
||||
* @pad: Padding to 64 bit.
|
||||
*/
|
||||
struct hl_info_args {
|
||||
@ -1143,6 +1213,8 @@ struct hl_info_args {
|
||||
__u32 period_ms;
|
||||
__u32 pll_index;
|
||||
__u32 eventfd;
|
||||
__u32 user_buffer_actual_size;
|
||||
__u32 sec_attest_nonce;
|
||||
};
|
||||
|
||||
__u32 pad;
|
||||
@ -1337,17 +1409,47 @@ struct hl_cs_chunk {
|
||||
#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000
|
||||
#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000
|
||||
|
||||
/*
|
||||
* The engine cores CS is merged into the existing CS ioctls.
|
||||
* Use it to control the engine cores mode.
|
||||
*/
|
||||
#define HL_CS_FLAGS_ENGINE_CORE_COMMAND 0x4000
|
||||
|
||||
#define HL_CS_STATUS_SUCCESS 0
|
||||
|
||||
#define HL_MAX_JOBS_PER_CS 512
|
||||
|
||||
/* HL_ENGINE_CORE_ values
|
||||
*
|
||||
* HL_ENGINE_CORE_HALT: engine core halt
|
||||
* HL_ENGINE_CORE_RUN: engine core run
|
||||
*/
|
||||
#define HL_ENGINE_CORE_HALT (1 << 0)
|
||||
#define HL_ENGINE_CORE_RUN (1 << 1)
|
||||
|
||||
struct hl_cs_in {
|
||||
|
||||
/* this holds address of array of hl_cs_chunk for restore phase */
|
||||
__u64 chunks_restore;
|
||||
union {
|
||||
struct {
|
||||
/* this holds address of array of hl_cs_chunk for restore phase */
|
||||
__u64 chunks_restore;
|
||||
|
||||
/* holds address of array of hl_cs_chunk for execution phase */
|
||||
__u64 chunks_execute;
|
||||
/* holds address of array of hl_cs_chunk for execution phase */
|
||||
__u64 chunks_execute;
|
||||
};
|
||||
|
||||
/* Valid only when HL_CS_FLAGS_ENGINE_CORE_COMMAND is set */
|
||||
struct {
|
||||
/* this holds address of array of uint32 for engine_cores */
|
||||
__u64 engine_cores;
|
||||
|
||||
/* number of engine cores in engine_cores array */
|
||||
__u32 num_engine_cores;
|
||||
|
||||
/* the core command to be sent towards engine cores */
|
||||
__u32 core_command;
|
||||
};
|
||||
};
|
||||
|
||||
union {
|
||||
/*
|
||||
@ -1412,7 +1514,7 @@ struct hl_cs_out {
|
||||
|
||||
/* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
|
||||
struct {
|
||||
/* This is the resereved signal handle id */
|
||||
/* This is the reserved signal handle id */
|
||||
__u32 handle_id;
|
||||
|
||||
/* This is the signals count */
|
||||
@ -1874,21 +1976,6 @@ struct hl_debug_args {
|
||||
__u32 ctx_id;
|
||||
};
|
||||
|
||||
/*
|
||||
* Notifier event values - for the notification mechanism and the HL_INFO_GET_EVENTS command
|
||||
*
|
||||
* HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event
|
||||
* HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code
|
||||
* HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset
|
||||
* HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error
|
||||
* HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE - Indicates device is unavailable
|
||||
*/
|
||||
#define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0)
|
||||
#define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1)
|
||||
#define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2)
|
||||
#define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3)
|
||||
#define HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE (1ULL << 4)
|
||||
|
||||
/*
|
||||
* Various information operations such as:
|
||||
* - H/W IP information
|
||||
|
Loading…
Reference in New Issue
Block a user