0411de8548
The ability to use NVIDIA's fuc has been retained *temporarily* in order to better debug any issues that may be lingering in our initial attempt at writing this ucode. Once I'm fairly confident we're okay, it'll be removed. There's a number of things not implemented by this fuc currently, but most of it is sets of state that our context setup would not have used anyway. No doubt we'll find out what they're for at some point, and implement it if required. This has been tested on 0xc0/0xc4 thus far, and from what I could tell it worked as well as NVIDIA's. It's also been tested on 0xc1, but even with NVIDIA's fuc that chipset doesn't work correctly with nouveau yet. 0xc3/0xc8/0xce should in theory be supported too, but I don't have the hardware to check that. There's no doubt numerous bugs to squash yet, please report any! Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
475 lines
11 KiB
Plaintext
475 lines
11 KiB
Plaintext
/* fuc microcode for nvc0 PGRAPH/GPC
|
|
*
|
|
* Copyright 2011 Red Hat Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
* Authors: Ben Skeggs
|
|
*/
|
|
|
|
/* To build:
|
|
* m4 nvc0_grgpc.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grgpc.fuc.h
|
|
*/
|
|
|
|
/* TODO
|
|
* - bracket certain functions with scratch writes, useful for debugging
|
|
* - watchdog timer around ctx operations
|
|
*/
|
|
|
|
.section nvc0_grgpc_data
|
|
include(`nvc0_graph.fuc')
|
|
gpc_id: .b32 0
|
|
gpc_mmio_list_head: .b32 0
|
|
gpc_mmio_list_tail: .b32 0
|
|
|
|
tpc_count: .b32 0
|
|
tpc_mask: .b32 0
|
|
tpc_mmio_list_head: .b32 0
|
|
tpc_mmio_list_tail: .b32 0
|
|
|
|
cmd_queue: queue_init
|
|
|
|
// chipset descriptions
|
|
chipsets:
|
|
.b8 0xc0 0 0 0
|
|
.b16 nvc0_gpc_mmio_head
|
|
.b16 nvc0_gpc_mmio_tail
|
|
.b16 nvc0_tpc_mmio_head
|
|
.b16 nvc0_tpc_mmio_tail
|
|
.b8 0xc1 0 0 0
|
|
.b16 nvc0_gpc_mmio_head
|
|
.b16 nvc1_gpc_mmio_tail
|
|
.b16 nvc0_tpc_mmio_head
|
|
.b16 nvc1_tpc_mmio_tail
|
|
.b8 0xc3 0 0 0
|
|
.b16 nvc0_gpc_mmio_head
|
|
.b16 nvc0_gpc_mmio_tail
|
|
.b16 nvc0_tpc_mmio_head
|
|
.b16 nvc3_tpc_mmio_tail
|
|
.b8 0xc4 0 0 0
|
|
.b16 nvc0_gpc_mmio_head
|
|
.b16 nvc0_gpc_mmio_tail
|
|
.b16 nvc0_tpc_mmio_head
|
|
.b16 nvc3_tpc_mmio_tail
|
|
.b8 0xc8 0 0 0
|
|
.b16 nvc0_gpc_mmio_head
|
|
.b16 nvc0_gpc_mmio_tail
|
|
.b16 nvc0_tpc_mmio_head
|
|
.b16 nvc0_tpc_mmio_tail
|
|
.b8 0xce 0 0 0
|
|
.b16 nvc0_gpc_mmio_head
|
|
.b16 nvc0_gpc_mmio_tail
|
|
.b16 nvc0_tpc_mmio_head
|
|
.b16 nvc3_tpc_mmio_tail
|
|
.b8 0 0 0 0
|
|
|
|
// GPC mmio lists
|
|
nvc0_gpc_mmio_head:
|
|
mmctx_data(0x000380, 1)
|
|
mmctx_data(0x000400, 6)
|
|
mmctx_data(0x000450, 9)
|
|
mmctx_data(0x000600, 1)
|
|
mmctx_data(0x000684, 1)
|
|
mmctx_data(0x000700, 5)
|
|
mmctx_data(0x000800, 1)
|
|
mmctx_data(0x000808, 3)
|
|
mmctx_data(0x000828, 1)
|
|
mmctx_data(0x000830, 1)
|
|
mmctx_data(0x0008d8, 1)
|
|
mmctx_data(0x0008e0, 1)
|
|
mmctx_data(0x0008e8, 6)
|
|
mmctx_data(0x00091c, 1)
|
|
mmctx_data(0x000924, 3)
|
|
mmctx_data(0x000b00, 1)
|
|
mmctx_data(0x000b08, 6)
|
|
mmctx_data(0x000bb8, 1)
|
|
mmctx_data(0x000c08, 1)
|
|
mmctx_data(0x000c10, 8)
|
|
mmctx_data(0x000c80, 1)
|
|
mmctx_data(0x000c8c, 1)
|
|
mmctx_data(0x001000, 3)
|
|
mmctx_data(0x001014, 1)
|
|
nvc0_gpc_mmio_tail:
|
|
mmctx_data(0x000c6c, 1);
|
|
nvc1_gpc_mmio_tail:
|
|
|
|
// TPC mmio lists
|
|
nvc0_tpc_mmio_head:
|
|
mmctx_data(0x000018, 1)
|
|
mmctx_data(0x00003c, 1)
|
|
mmctx_data(0x000048, 1)
|
|
mmctx_data(0x000064, 1)
|
|
mmctx_data(0x000088, 1)
|
|
mmctx_data(0x000200, 6)
|
|
mmctx_data(0x00021c, 2)
|
|
mmctx_data(0x000300, 6)
|
|
mmctx_data(0x0003d0, 1)
|
|
mmctx_data(0x0003e0, 2)
|
|
mmctx_data(0x000400, 3)
|
|
mmctx_data(0x000420, 1)
|
|
mmctx_data(0x0004b0, 1)
|
|
mmctx_data(0x0004e8, 1)
|
|
mmctx_data(0x0004f4, 1)
|
|
mmctx_data(0x000520, 2)
|
|
mmctx_data(0x000604, 4)
|
|
mmctx_data(0x000644, 20)
|
|
mmctx_data(0x000698, 1)
|
|
mmctx_data(0x000750, 2)
|
|
nvc0_tpc_mmio_tail:
|
|
mmctx_data(0x000758, 1)
|
|
mmctx_data(0x0002c4, 1)
|
|
mmctx_data(0x0004bc, 1)
|
|
mmctx_data(0x0006e0, 1)
|
|
nvc3_tpc_mmio_tail:
|
|
mmctx_data(0x000544, 1)
|
|
nvc1_tpc_mmio_tail:
|
|
|
|
|
|
.section nvc0_grgpc_code
|
|
bra init
|
|
define(`include_code')
|
|
include(`nvc0_graph.fuc')
|
|
|
|
// reports an exception to the host
|
|
//
|
|
// In: $r15 error code (see nvc0_graph.fuc)
|
|
//
|
|
error:
|
|
push $r14
|
|
mov $r14 -0x67ec // 0x9814
|
|
sethi $r14 0x400000
|
|
call nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code
|
|
add b32 $r14 0x41c
|
|
mov $r15 1
|
|
call nv_wr32 // HUB_CTXCTL_INTR_UP_SET
|
|
pop $r14
|
|
ret
|
|
|
|
// GPC fuc initialisation, executed by triggering ucode start, will
|
|
// fall through to main loop after completion.
|
|
//
|
|
// Input:
|
|
// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
|
|
// CC_SCRATCH[1]: context base
|
|
//
|
|
// Output:
|
|
// CC_SCRATCH[0]:
|
|
// 31:31: set to signal completion
|
|
// CC_SCRATCH[1]:
|
|
// 31:0: GPC context size
|
|
//
|
|
init:
|
|
clear b32 $r0
|
|
mov $sp $r0
|
|
|
|
// enable fifo access
|
|
mov $r1 0x1200
|
|
mov $r2 2
|
|
iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
|
|
|
|
// setup i0 handler, and route all interrupts to it
|
|
mov $r1 ih
|
|
mov $iv0 $r1
|
|
mov $r1 0x400
|
|
iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
|
|
|
|
// enable fifo interrupt
|
|
mov $r2 4
|
|
iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
|
|
|
|
// enable interrupts
|
|
bset $flags ie0
|
|
|
|
// figure out which GPC we are, and how many TPCs we have
|
|
mov $r1 0x608
|
|
shl b32 $r1 6
|
|
iord $r2 I[$r1 + 0x000] // UNITS
|
|
mov $r3 1
|
|
and $r2 0x1f
|
|
shl b32 $r3 $r2
|
|
sub b32 $r3 1
|
|
st b32 D[$r0 + tpc_count] $r2
|
|
st b32 D[$r0 + tpc_mask] $r3
|
|
add b32 $r1 0x400
|
|
iord $r2 I[$r1 + 0x000] // MYINDEX
|
|
st b32 D[$r0 + gpc_id] $r2
|
|
|
|
// find context data for this chipset
|
|
mov $r2 0x800
|
|
shl b32 $r2 6
|
|
iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
|
|
mov $r1 chipsets - 12
|
|
init_find_chipset:
|
|
add b32 $r1 12
|
|
ld b32 $r3 D[$r1 + 0x00]
|
|
cmpu b32 $r3 $r2
|
|
bra e init_context
|
|
cmpu b32 $r3 0
|
|
bra ne init_find_chipset
|
|
// unknown chipset
|
|
ret
|
|
|
|
// initialise context base, and size tracking
|
|
init_context:
|
|
mov $r2 0x800
|
|
shl b32 $r2 6
|
|
iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base
|
|
clear b32 $r3 // track GPC context size here
|
|
|
|
// set mmctx base addresses now so we don't have to do it later,
|
|
// they don't currently ever change
|
|
mov $r4 0x700
|
|
shl b32 $r4 6
|
|
shr b32 $r5 $r2 8
|
|
iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE
|
|
iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE
|
|
|
|
// calculate GPC mmio context size, store the chipset-specific
|
|
// mmio list pointers somewhere we can get at them later without
|
|
// re-parsing the chipset list
|
|
clear b32 $r14
|
|
clear b32 $r15
|
|
ld b16 $r14 D[$r1 + 4]
|
|
ld b16 $r15 D[$r1 + 6]
|
|
st b16 D[$r0 + gpc_mmio_list_head] $r14
|
|
st b16 D[$r0 + gpc_mmio_list_tail] $r15
|
|
call mmctx_size
|
|
add b32 $r2 $r15
|
|
add b32 $r3 $r15
|
|
|
|
// calculate per-TPC mmio context size, store the list pointers
|
|
ld b16 $r14 D[$r1 + 8]
|
|
ld b16 $r15 D[$r1 + 10]
|
|
st b16 D[$r0 + tpc_mmio_list_head] $r14
|
|
st b16 D[$r0 + tpc_mmio_list_tail] $r15
|
|
call mmctx_size
|
|
ld b32 $r14 D[$r0 + tpc_count]
|
|
mulu $r14 $r15
|
|
add b32 $r2 $r14
|
|
add b32 $r3 $r14
|
|
|
|
// round up base/size to 256 byte boundary (for strand SWBASE)
|
|
add b32 $r4 0x1300
|
|
shr b32 $r3 2
|
|
iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!?
|
|
shr b32 $r2 8
|
|
shr b32 $r3 6
|
|
add b32 $r2 1
|
|
add b32 $r3 1
|
|
shl b32 $r2 8
|
|
shl b32 $r3 8
|
|
|
|
// calculate size of strand context data
|
|
mov b32 $r15 $r2
|
|
call strand_ctx_init
|
|
add b32 $r3 $r15
|
|
|
|
// save context size, and tell HUB we're done
|
|
mov $r1 0x800
|
|
shl b32 $r1 6
|
|
iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size
|
|
add b32 $r1 0x800
|
|
clear b32 $r2
|
|
bset $r2 31
|
|
iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
|
|
|
|
// Main program loop, very simple, sleeps until woken up by the interrupt
|
|
// handler, pulls a command from the queue and executes its handler
|
|
//
|
|
main:
|
|
bset $flags $p0
|
|
sleep $p0
|
|
mov $r13 cmd_queue
|
|
call queue_get
|
|
bra $p1 main
|
|
|
|
// 0x0000-0x0003 are all context transfers
|
|
cmpu b32 $r14 0x04
|
|
bra nc main_not_ctx_xfer
|
|
// fetch $flags and mask off $p1/$p2
|
|
mov $r1 $flags
|
|
mov $r2 0x0006
|
|
not b32 $r2
|
|
and $r1 $r2
|
|
// set $p1/$p2 according to transfer type
|
|
shl b32 $r14 1
|
|
or $r1 $r14
|
|
mov $flags $r1
|
|
// transfer context data
|
|
call ctx_xfer
|
|
bra main
|
|
|
|
main_not_ctx_xfer:
|
|
shl b32 $r15 $r14 16
|
|
or $r15 E_BAD_COMMAND
|
|
call error
|
|
bra main
|
|
|
|
// interrupt handler
|
|
ih:
|
|
push $r8
|
|
mov $r8 $flags
|
|
push $r8
|
|
push $r9
|
|
push $r10
|
|
push $r11
|
|
push $r13
|
|
push $r14
|
|
push $r15
|
|
|
|
// incoming fifo command?
|
|
iord $r10 I[$r0 + 0x200] // INTR
|
|
and $r11 $r10 0x00000004
|
|
bra e ih_no_fifo
|
|
// queue incoming fifo command for later processing
|
|
mov $r11 0x1900
|
|
mov $r13 cmd_queue
|
|
iord $r14 I[$r11 + 0x100] // FIFO_CMD
|
|
iord $r15 I[$r11 + 0x000] // FIFO_DATA
|
|
call queue_put
|
|
add b32 $r11 0x400
|
|
mov $r14 1
|
|
iowr I[$r11 + 0x000] $r14 // FIFO_ACK
|
|
|
|
// ack, and wake up main()
|
|
ih_no_fifo:
|
|
iowr I[$r0 + 0x100] $r10 // INTR_ACK
|
|
|
|
pop $r15
|
|
pop $r14
|
|
pop $r13
|
|
pop $r11
|
|
pop $r10
|
|
pop $r9
|
|
pop $r8
|
|
mov $flags $r8
|
|
pop $r8
|
|
bclr $flags $p0
|
|
iret
|
|
|
|
// Set this GPC's bit in HUB_BAR, used to signal completion of various
|
|
// activities to the HUB fuc
|
|
//
|
|
hub_barrier_done:
|
|
mov $r15 1
|
|
ld b32 $r14 D[$r0 + gpc_id]
|
|
shl b32 $r15 $r14
|
|
mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET
|
|
sethi $r14 0x400000
|
|
call nv_wr32
|
|
ret
|
|
|
|
// Disables various things, waits a bit, and re-enables them..
|
|
//
|
|
// Not sure how exactly this helps, perhaps "ENABLE" is not such a
|
|
// good description for the bits we turn off? Anyways, without this,
|
|
// funny things happen.
|
|
//
|
|
ctx_redswitch:
|
|
mov $r14 0x614
|
|
shl b32 $r14 6
|
|
mov $r15 0x020
|
|
iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER
|
|
mov $r15 8
|
|
ctx_redswitch_delay:
|
|
sub b32 $r15 1
|
|
bra ne ctx_redswitch_delay
|
|
mov $r15 0xa20
|
|
iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER
|
|
ret
|
|
|
|
// Transfer GPC context data between GPU and storage area
|
|
//
|
|
// In: $r15 context base address
|
|
// $p1 clear on save, set on load
|
|
// $p2 set if opposite direction done/will be done, so:
|
|
// on save it means: "a load will follow this save"
|
|
// on load it means: "a save preceeded this load"
|
|
//
|
|
ctx_xfer:
|
|
// set context base address
|
|
mov $r1 0xa04
|
|
shl b32 $r1 6
|
|
iowr I[$r1 + 0x000] $r15// MEM_BASE
|
|
bra not $p1 ctx_xfer_not_load
|
|
call ctx_redswitch
|
|
ctx_xfer_not_load:
|
|
|
|
// strands
|
|
mov $r1 0x4afc
|
|
sethi $r1 0x20000
|
|
mov $r2 0xc
|
|
iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
|
|
call strand_wait
|
|
mov $r2 0x47fc
|
|
sethi $r2 0x20000
|
|
iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
|
|
xbit $r2 $flags $p1
|
|
add b32 $r2 3
|
|
iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
|
|
|
|
// mmio context
|
|
xbit $r10 $flags $p1 // direction
|
|
or $r10 2 // first
|
|
mov $r11 0x0000
|
|
sethi $r11 0x500000
|
|
ld b32 $r12 D[$r0 + gpc_id]
|
|
shl b32 $r12 15
|
|
add b32 $r11 $r12 // base = NV_PGRAPH_GPCn
|
|
ld b32 $r12 D[$r0 + gpc_mmio_list_head]
|
|
ld b32 $r13 D[$r0 + gpc_mmio_list_tail]
|
|
mov $r14 0 // not multi
|
|
call mmctx_xfer
|
|
|
|
// per-TPC mmio context
|
|
xbit $r10 $flags $p1 // direction
|
|
or $r10 4 // last
|
|
mov $r11 0x4000
|
|
sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0
|
|
ld b32 $r12 D[$r0 + gpc_id]
|
|
shl b32 $r12 15
|
|
add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0
|
|
ld b32 $r12 D[$r0 + tpc_mmio_list_head]
|
|
ld b32 $r13 D[$r0 + tpc_mmio_list_tail]
|
|
ld b32 $r15 D[$r0 + tpc_mask]
|
|
mov $r14 0x800 // stride = 0x800
|
|
call mmctx_xfer
|
|
|
|
// wait for strands to finish
|
|
call strand_wait
|
|
|
|
// if load, or a save without a load following, do some
|
|
// unknown stuff that's done after finishing a block of
|
|
// strand commands
|
|
bra $p1 ctx_xfer_post
|
|
bra not $p2 ctx_xfer_done
|
|
ctx_xfer_post:
|
|
mov $r1 0x4afc
|
|
sethi $r1 0x20000
|
|
mov $r2 0xd
|
|
iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d
|
|
call strand_wait
|
|
|
|
// mark completion in HUB's barrier
|
|
ctx_xfer_done:
|
|
call hub_barrier_done
|
|
ret
|
|
|
|
.align 256
|