habanalabs: support non power-of-2 DRAM phys page sizes

DRAM physical page sizes depend of the amount of HBMs available in
the device. this number is device-dependent and may also be subject
to binning when one or more of the DRAM controllers are found to
to be faulty. Such a configuration may lead to partitioning the DRAM
to non-power-of-2 pages.

To support this feature we also need to add infrastructure of address
scarmbling.

Signed-off-by: Moti Haimovski <mhaimovski@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Moti Haimovski 2020-11-18 20:15:29 +02:00 committed by Oded Gabbay
parent a1f8533269
commit b19dc67aa8
6 changed files with 213 additions and 44 deletions

View File

@ -333,8 +333,10 @@ static int mmu_show(struct seq_file *s, void *data)
return 0; return 0;
} }
seq_printf(s, "asid: %u, virt_addr: 0x%llx\n", seq_printf(s,
dev_entry->mmu_asid, dev_entry->mmu_addr); "asid: %u, virt_addr: 0x%llx, scrambled virt_addr: 0x%llx\n",
dev_entry->mmu_asid, dev_entry->mmu_addr,
hops_info.scrambled_vaddr);
for (i = 0 ; i < hops_info.used_hops ; i++) { for (i = 0 ; i < hops_info.used_hops ; i++) {
seq_printf(s, "hop%d_addr: 0x%llx\n", seq_printf(s, "hop%d_addr: 0x%llx\n",

View File

@ -848,6 +848,8 @@ enum div_select_defs {
* @collective_wait_init_cs: Generate collective master/slave packets * @collective_wait_init_cs: Generate collective master/slave packets
* and place them in the relevant cs jobs * and place them in the relevant cs jobs
* @collective_wait_create_jobs: allocate collective wait cs jobs * @collective_wait_create_jobs: allocate collective wait cs jobs
* @scramble_vaddr: Routine to scramble the virtual address prior of mapping it
* in the MMU.
*/ */
struct hl_asic_funcs { struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev); int (*early_init)(struct hl_device *hdev);
@ -957,6 +959,7 @@ struct hl_asic_funcs {
int (*collective_wait_create_jobs)(struct hl_device *hdev, int (*collective_wait_create_jobs)(struct hl_device *hdev,
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id, struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
u32 collective_engine_id); u32 collective_engine_id);
u64 (*scramble_vaddr)(struct hl_device *hdev, u64 virt_addr);
}; };
@ -1690,10 +1693,14 @@ struct hl_mmu_per_hop_info {
* struct hl_mmu_hop_info - A structure describing the TLB hops and their * struct hl_mmu_hop_info - A structure describing the TLB hops and their
* hop-entries that were created in order to translate a virtual address to a * hop-entries that were created in order to translate a virtual address to a
* physical one. * physical one.
* @scrambled_vaddr: The value of the virtual address after scrambling. This
* address replaces the original virtual-address when mapped
* in the MMU tables.
* @hop_info: Array holding the per-hop information used for the translation. * @hop_info: Array holding the per-hop information used for the translation.
* @used_hops: The number of hops used for the translation. * @used_hops: The number of hops used for the translation.
*/ */
struct hl_mmu_hop_info { struct hl_mmu_hop_info {
u64 scrambled_vaddr;
struct hl_mmu_per_hop_info hop_info[MMU_ARCH_5_HOPS]; struct hl_mmu_per_hop_info hop_info[MMU_ARCH_5_HOPS];
u32 used_hops; u32 used_hops;
}; };
@ -2184,6 +2191,7 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr); int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr);
int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
struct hl_mmu_hop_info *hops); struct hl_mmu_hop_info *hops);
u64 hl_mmu_scramble_vaddr(struct hl_device *hdev, u64 virt_addr);
bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr); bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr);
int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,

View File

@ -14,6 +14,9 @@
#define HL_MMU_DEBUG 0 #define HL_MMU_DEBUG 0
/* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */
#define DRAM_POOL_PAGE_SIZE SZ_8M
/* /*
* The va ranges in context object contain a list with the available chunks of * The va ranges in context object contain a list with the available chunks of
* device virtual memory. * device virtual memory.
@ -54,15 +57,14 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
struct hl_vm *vm = &hdev->vm; struct hl_vm *vm = &hdev->vm;
struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_vm_phys_pg_pack *phys_pg_pack;
u64 paddr = 0, total_size, num_pgs, i; u64 paddr = 0, total_size, num_pgs, i;
u32 num_curr_pgs, page_size, page_shift; u32 num_curr_pgs, page_size;
int handle, rc; int handle, rc;
bool contiguous; bool contiguous;
num_curr_pgs = 0; num_curr_pgs = 0;
page_size = hdev->asic_prop.dram_page_size; page_size = hdev->asic_prop.dram_page_size;
page_shift = __ffs(page_size); num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size);
num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift; total_size = num_pgs * page_size;
total_size = num_pgs << page_shift;
if (!total_size) { if (!total_size) {
dev_err(hdev->dev, "Cannot allocate 0 bytes\n"); dev_err(hdev->dev, "Cannot allocate 0 bytes\n");
@ -518,7 +520,8 @@ static inline int add_va_block(struct hl_device *hdev,
} }
/** /**
* get_va_block() - get a virtual block for the given size and alignment. * get_va_block_pow2() - get a virtual block for the given size and alignment
* where alignment is a power of 2.
* @hdev: pointer to the habanalabs device structure. * @hdev: pointer to the habanalabs device structure.
* @va_range: pointer to the virtual addresses range. * @va_range: pointer to the virtual addresses range.
* @size: requested block size. * @size: requested block size.
@ -531,12 +534,13 @@ static inline int add_va_block(struct hl_device *hdev,
* - Reserve the requested block and update the list. * - Reserve the requested block and update the list.
* - Return the start address of the virtual block. * - Return the start address of the virtual block.
*/ */
static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range, static u64 get_va_block_pow2(struct hl_device *hdev,
u64 size, u64 hint_addr, u32 va_block_align) struct hl_va_range *va_range,
u64 size, u64 hint_addr, u32 va_block_align)
{ {
struct hl_vm_va_block *va_block, *new_va_block = NULL; struct hl_vm_va_block *va_block, *new_va_block = NULL;
u64 valid_start, valid_size, prev_start, prev_end, align_mask, u64 valid_start, valid_size, prev_start, prev_end, align_mask,
res_valid_start = 0, res_valid_size = 0; reserved_valid_start = 0, reserved_valid_size = 0;
bool add_prev = false; bool add_prev = false;
align_mask = ~((u64)va_block_align - 1); align_mask = ~((u64)va_block_align - 1);
@ -562,34 +566,34 @@ static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
valid_size = va_block->end - valid_start; valid_size = va_block->end - valid_start;
if (valid_size >= size && if (valid_size >= size && (!new_va_block ||
(!new_va_block || valid_size < res_valid_size)) { valid_size < reserved_valid_size)) {
new_va_block = va_block; new_va_block = va_block;
res_valid_start = valid_start; reserved_valid_start = valid_start;
res_valid_size = valid_size; reserved_valid_size = valid_size;
} }
if (hint_addr && hint_addr >= valid_start && if (hint_addr && hint_addr >= valid_start &&
((hint_addr + size) <= va_block->end)) { (hint_addr + size) <= va_block->end) {
new_va_block = va_block; new_va_block = va_block;
res_valid_start = hint_addr; reserved_valid_start = hint_addr;
res_valid_size = valid_size; reserved_valid_size = valid_size;
break; break;
} }
} }
if (!new_va_block) { if (!new_va_block) {
dev_err(hdev->dev, "no available va block for size %llu\n", dev_err(hdev->dev, "no available va block for size %llu\n",
size); size);
goto out; goto out;
} }
if (res_valid_start > new_va_block->start) { if (reserved_valid_start > new_va_block->start) {
prev_start = new_va_block->start; prev_start = new_va_block->start;
prev_end = res_valid_start - 1; prev_end = reserved_valid_start - 1;
new_va_block->start = res_valid_start; new_va_block->start = reserved_valid_start;
new_va_block->size = res_valid_size; new_va_block->size = reserved_valid_size;
add_prev = true; add_prev = true;
} }
@ -610,10 +614,98 @@ static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
out: out:
mutex_unlock(&va_range->lock); mutex_unlock(&va_range->lock);
return res_valid_start; return reserved_valid_start;
} }
/** /**
* get_va_block_non_pow2() - get a virtual block for the given size and
* alignment where alignment is not a power of 2.
* @hdev: pointer to the habanalabs device structure.
* @va_range: pointer to the virtual addresses range.
* @size: requested block size.
* @hint_addr: hint for requested address by the user.
* @va_block_align: required alignment of the virtual block start address.
*
* This function does the following:
* - Iterate on the virtual block list to find a suitable virtual block for the
* given size and alignment.
* - Reserve the requested block and update the list.
* - Return the start address of the virtual block.
*/
static u64 get_va_block_non_pow2(struct hl_device *hdev,
struct hl_va_range *va_range,
u64 size, u64 hint_addr, u32 va_block_align)
{
struct hl_vm_va_block *va_block, *new_va_block = NULL;
u64 reserved_valid_start = 0;
/*
* with non-power-of-2 range we work only with page granularity and the
* start address is page aligned, so no need for alignment checking.
*/
size = DIV_ROUND_UP_ULL(size, va_range->page_size) *
va_range->page_size;
mutex_lock(&va_range->lock);
print_va_list_locked(hdev, &va_range->list);
list_for_each_entry(va_block, &va_range->list, node) {
if ((va_block->start + size) > va_block->end)
continue;
new_va_block = va_block;
reserved_valid_start = va_block->start;
break;
}
if (!new_va_block) {
dev_err(hdev->dev, "no available va block for size %llu\n",
size);
goto out;
}
if (new_va_block->size > size) {
new_va_block->start += size;
new_va_block->size = new_va_block->end - new_va_block->start;
} else {
list_del(&new_va_block->node);
kfree(new_va_block);
}
print_va_list_locked(hdev, &va_range->list);
out:
mutex_unlock(&va_range->lock);
return reserved_valid_start;
}
/*
* get_va_block() - get a virtual block for the given size and alignment.
* @hdev: pointer to the habanalabs device structure.
* @va_range: pointer to the virtual addresses range.
* @size: requested block size.
* @hint_addr: hint for requested address by the user.
* @va_block_align: required alignment of the virtual block start address.
*
* This function does the following:
* - Iterate on the virtual block list to find a suitable virtual block for the
* given size and alignment.
* - Reserve the requested block and update the list.
* - Return the start address of the virtual block.
*/
static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
u64 size, u64 hint_addr, u32 va_block_align)
{
if (is_power_of_2(va_range->page_size))
return get_va_block_pow2(hdev, va_range,
size, hint_addr, va_block_align);
else
return get_va_block_non_pow2(hdev, va_range,
size, hint_addr, va_block_align);
}
/*
* hl_reserve_va_block() - reserve a virtual block of a given size. * hl_reserve_va_block() - reserve a virtual block of a given size.
* @hdev: pointer to the habanalabs device structure. * @hdev: pointer to the habanalabs device structure.
* @ctx: current context * @ctx: current context
@ -1024,7 +1116,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
hint_addr = args->map_device.hint_addr; hint_addr = args->map_device.hint_addr;
/* DRAM VA alignment is the same as the DRAM page size */ /* DRAM VA alignment is the same as the MMU page size */
va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM]; va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
va_block_align = hdev->asic_prop.dmmu.page_size; va_block_align = hdev->asic_prop.dmmu.page_size;
} }
@ -1129,6 +1221,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
bool ctx_free) bool ctx_free)
{ {
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
struct hl_vm_hash_node *hnode = NULL; struct hl_vm_hash_node *hnode = NULL;
struct hl_userptr *userptr = NULL; struct hl_userptr *userptr = NULL;
@ -1192,7 +1285,13 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
goto mapping_cnt_err; goto mapping_cnt_err;
} }
vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); if (!is_userptr && !is_power_of_2(phys_pg_pack->page_size))
vaddr = prop->dram_base_address +
DIV_ROUND_DOWN_ULL(vaddr - prop->dram_base_address,
phys_pg_pack->page_size) *
phys_pg_pack->page_size;
else
vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
mutex_lock(&ctx->mmu_lock); mutex_lock(&ctx->mmu_lock);
@ -1637,16 +1736,22 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
INIT_LIST_HEAD(&va_range->list); INIT_LIST_HEAD(&va_range->list);
/* PAGE_SIZE alignment */ /*
* PAGE_SIZE alignment
* it is the callers responsibility to align the addresses if the
* page size is not a power of 2
*/
if (start & (PAGE_SIZE - 1)) { if (is_power_of_2(page_size)) {
start &= PAGE_MASK; if (start & (PAGE_SIZE - 1)) {
start += PAGE_SIZE; start &= PAGE_MASK;
start += PAGE_SIZE;
}
if (end & (PAGE_SIZE - 1))
end &= PAGE_MASK;
} }
if (end & (PAGE_SIZE - 1))
end &= PAGE_MASK;
if (start >= end) { if (start >= end) {
dev_err(hdev->dev, "too small vm range for va list\n"); dev_err(hdev->dev, "too small vm range for va list\n");
return -EFAULT; return -EFAULT;
@ -1820,7 +1925,8 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
dram_range_start = prop->dmmu.start_addr; dram_range_start = prop->dmmu.start_addr;
dram_range_end = prop->dmmu.end_addr; dram_range_end = prop->dmmu.end_addr;
dram_page_size = prop->dmmu.page_size; dram_page_size = prop->dram_page_size ?
prop->dram_page_size : prop->dmmu.page_size;
host_range_start = prop->pmmu.start_addr; host_range_start = prop->pmmu.start_addr;
host_range_end = prop->pmmu.end_addr; host_range_end = prop->pmmu.end_addr;
host_page_size = prop->pmmu.page_size; host_page_size = prop->pmmu.page_size;
@ -1938,7 +2044,13 @@ int hl_vm_init(struct hl_device *hdev)
struct hl_vm *vm = &hdev->vm; struct hl_vm *vm = &hdev->vm;
int rc; int rc;
vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1); if (is_power_of_2(prop->dram_page_size))
vm->dram_pg_pool =
gen_pool_create(__ffs(prop->dram_page_size), -1);
else
vm->dram_pg_pool =
gen_pool_create(__ffs(DRAM_POOL_PAGE_SIZE), -1);
if (!vm->dram_pg_pool) { if (!vm->dram_pg_pool) {
dev_err(hdev->dev, "Failed to create dram page pool\n"); dev_err(hdev->dev, "Failed to create dram page pool\n");
return -ENOMEM; return -ENOMEM;

View File

@ -166,7 +166,6 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
mmu_prop = &prop->pmmu; mmu_prop = &prop->pmmu;
pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
/* /*
* The H/W handles mapping of specific page sizes. Hence if the page * The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and unmap them separately. * size is bigger, we break it to sub-pages and unmap them separately.
@ -174,11 +173,21 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
if ((page_size % mmu_prop->page_size) == 0) { if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size; real_page_size = mmu_prop->page_size;
} else { } else {
dev_err(hdev->dev, /*
"page size of %u is not %uKB aligned, can't unmap\n", * MMU page size may differ from DRAM page size.
page_size, mmu_prop->page_size >> 10); * In such case work with the DRAM page size and let the MMU
* scrambling routine to handle this mismatch when
* calculating the address to remove from the MMU page table
*/
if (is_dram_addr && ((page_size % prop->dram_page_size) == 0)) {
real_page_size = prop->dram_page_size;
} else {
dev_err(hdev->dev,
"page size of %u is not %uKB aligned, can't unmap\n",
page_size, mmu_prop->page_size >> 10);
return -EFAULT; return -EFAULT;
}
} }
npages = page_size / real_page_size; npages = page_size / real_page_size;
@ -253,6 +262,17 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
*/ */
if ((page_size % mmu_prop->page_size) == 0) { if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size; real_page_size = mmu_prop->page_size;
} else if (is_dram_addr && ((page_size % prop->dram_page_size) == 0) &&
(prop->dram_page_size < mmu_prop->page_size)) {
/*
* MMU page size may differ from DRAM page size.
* In such case work with the DRAM page size and let the MMU
* scrambling routine handle this mismatch when calculating
* the address to place in the MMU page table. (in that case
* also make sure that the dram_page_size smaller than the
* mmu page size)
*/
real_page_size = prop->dram_page_size;
} else { } else {
dev_err(hdev->dev, dev_err(hdev->dev,
"page size of %u is not %uKB aligned, can't map\n", "page size of %u is not %uKB aligned, can't map\n",
@ -261,10 +281,21 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
return -EFAULT; return -EFAULT;
} }
if (phys_addr & (real_page_size - 1)) /*
* Verify that the phys and virt addresses are aligned with the
* MMU page size (in dram this means checking the address and MMU
* after scrambling)
*/
if ((is_dram_addr &&
((hdev->asic_funcs->scramble_vaddr(hdev, phys_addr) &
(mmu_prop->page_size - 1)) ||
(hdev->asic_funcs->scramble_vaddr(hdev, virt_addr) &
(mmu_prop->page_size - 1)))) ||
(!is_dram_addr && ((phys_addr & (real_page_size - 1)) ||
(virt_addr & (real_page_size - 1)))))
dev_crit(hdev->dev, dev_crit(hdev->dev,
"Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size", "Mapping address 0x%llx with virtual address 0x%llx and page size of 0x%x is erroneous! Addresses must be divisible by page size",
phys_addr, real_page_size); phys_addr, virt_addr, real_page_size);
npages = page_size / real_page_size; npages = page_size / real_page_size;
real_virt_addr = virt_addr; real_virt_addr = virt_addr;
@ -474,6 +505,8 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
if (!hdev->mmu_enable) if (!hdev->mmu_enable)
return -EOPNOTSUPP; return -EOPNOTSUPP;
hops->scrambled_vaddr = virt_addr; /* assume no scrambling */
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->dmmu.start_addr, prop->dmmu.start_addr,
prop->dmmu.end_addr); prop->dmmu.end_addr);
@ -513,3 +546,15 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
return 0; return 0;
} }
/**
* hl_mmu_scramble_vaddr() - The generic mmu virtual address scrambling routine.
* @hdev: pointer to device data.
* @virt_addr: The virtual address to scramble.
*
* Return: The scrambled virtual address.
*/
u64 hl_mmu_scramble_vaddr(struct hl_device *hdev, u64 virt_addr)
{
return virt_addr;
}

View File

@ -8308,7 +8308,8 @@ static const struct hl_asic_funcs gaudi_funcs = {
.set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw, .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
.get_device_time = gaudi_get_device_time, .get_device_time = gaudi_get_device_time,
.collective_wait_init_cs = gaudi_collective_wait_init_cs, .collective_wait_init_cs = gaudi_collective_wait_init_cs,
.collective_wait_create_jobs = gaudi_collective_wait_create_jobs .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
.scramble_vaddr = hl_mmu_scramble_vaddr
}; };
/** /**

View File

@ -5456,7 +5456,8 @@ static const struct hl_asic_funcs goya_funcs = {
.set_dma_mask_from_fw = goya_set_dma_mask_from_fw, .set_dma_mask_from_fw = goya_set_dma_mask_from_fw,
.get_device_time = goya_get_device_time, .get_device_time = goya_get_device_time,
.collective_wait_init_cs = goya_collective_wait_init_cs, .collective_wait_init_cs = goya_collective_wait_init_cs,
.collective_wait_create_jobs = goya_collective_wait_create_jobs .collective_wait_create_jobs = goya_collective_wait_create_jobs,
.scramble_vaddr = hl_mmu_scramble_vaddr
}; };
/* /*