drm/i915: Record batch buffer following GPU error
In order to improve our diagnostic capabilities following a GPU hang and subsequent reset, we need to record the batch buffer that triggered the error. We assume that the current batch buffer, plus a few details about what else is on the active list, will be sufficient -- at the very least an improvement over nothing. The extra information is stored in /debug/dri/.../i915_error_state following an error, and may be decoded using intel_gpu_tools/tools/intel_error_decode. v2: Avoid excessive work under spinlocks. v3: Include ringbuffer for later analysis. v4: Use kunmap correctly and record more buffer state. v5: Search ringbuffer for current batch buffer v6: Use a work fn for the impossible IRQ error case. v7: Avoid non-atomic paths whilst in IRQ context. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
parent
7b9c5abee9
commit
9df30794f6
@ -350,6 +350,36 @@ static int i915_ringbuffer_info(struct seq_file *m, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *pin_flag(int pinned)
|
||||
{
|
||||
if (pinned > 0)
|
||||
return " P";
|
||||
else if (pinned < 0)
|
||||
return " p";
|
||||
else
|
||||
return "";
|
||||
}
|
||||
|
||||
static const char *tiling_flag(int tiling)
|
||||
{
|
||||
switch (tiling) {
|
||||
default:
|
||||
case I915_TILING_NONE: return "";
|
||||
case I915_TILING_X: return " X";
|
||||
case I915_TILING_Y: return " Y";
|
||||
}
|
||||
}
|
||||
|
||||
static const char *dirty_flag(int dirty)
|
||||
{
|
||||
return dirty ? " dirty" : "";
|
||||
}
|
||||
|
||||
static const char *purgeable_flag(int purgeable)
|
||||
{
|
||||
return purgeable ? " purgeable" : "";
|
||||
}
|
||||
|
||||
static int i915_error_state(struct seq_file *m, void *unused)
|
||||
{
|
||||
struct drm_info_node *node = (struct drm_info_node *) m->private;
|
||||
@ -357,6 +387,7 @@ static int i915_error_state(struct seq_file *m, void *unused)
|
||||
drm_i915_private_t *dev_priv = dev->dev_private;
|
||||
struct drm_i915_error_state *error;
|
||||
unsigned long flags;
|
||||
int i, page, offset, elt;
|
||||
|
||||
spin_lock_irqsave(&dev_priv->error_lock, flags);
|
||||
if (!dev_priv->first_error) {
|
||||
@ -368,6 +399,7 @@ static int i915_error_state(struct seq_file *m, void *unused)
|
||||
|
||||
seq_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec,
|
||||
error->time.tv_usec);
|
||||
seq_printf(m, "PCI ID: 0x%04x\n", dev->pci_device);
|
||||
seq_printf(m, "EIR: 0x%08x\n", error->eir);
|
||||
seq_printf(m, " PGTBL_ER: 0x%08x\n", error->pgtbl_er);
|
||||
seq_printf(m, " INSTPM: 0x%08x\n", error->instpm);
|
||||
@ -379,6 +411,59 @@ static int i915_error_state(struct seq_file *m, void *unused)
|
||||
seq_printf(m, " INSTPS: 0x%08x\n", error->instps);
|
||||
seq_printf(m, " INSTDONE1: 0x%08x\n", error->instdone1);
|
||||
}
|
||||
seq_printf(m, "seqno: 0x%08x\n", error->seqno);
|
||||
|
||||
if (error->active_bo_count) {
|
||||
seq_printf(m, "Buffers [%d]:\n", error->active_bo_count);
|
||||
|
||||
for (i = 0; i < error->active_bo_count; i++) {
|
||||
seq_printf(m, " %08x %8zd %08x %08x %08x%s%s%s%s",
|
||||
error->active_bo[i].gtt_offset,
|
||||
error->active_bo[i].size,
|
||||
error->active_bo[i].read_domains,
|
||||
error->active_bo[i].write_domain,
|
||||
error->active_bo[i].seqno,
|
||||
pin_flag(error->active_bo[i].pinned),
|
||||
tiling_flag(error->active_bo[i].tiling),
|
||||
dirty_flag(error->active_bo[i].dirty),
|
||||
purgeable_flag(error->active_bo[i].purgeable));
|
||||
|
||||
if (error->active_bo[i].name)
|
||||
seq_printf(m, " (name: %d)", error->active_bo[i].name);
|
||||
if (error->active_bo[i].fence_reg != I915_FENCE_REG_NONE)
|
||||
seq_printf(m, " (fence: %d)", error->active_bo[i].fence_reg);
|
||||
|
||||
seq_printf(m, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(error->batchbuffer); i++) {
|
||||
if (error->batchbuffer[i]) {
|
||||
struct drm_i915_error_object *obj = error->batchbuffer[i];
|
||||
|
||||
seq_printf(m, "--- gtt_offset = 0x%08x\n", obj->gtt_offset);
|
||||
offset = 0;
|
||||
for (page = 0; page < obj->page_count; page++) {
|
||||
for (elt = 0; elt < PAGE_SIZE/4; elt++) {
|
||||
seq_printf(m, "%08x : %08x\n", offset, obj->pages[page][elt]);
|
||||
offset += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (error->ringbuffer) {
|
||||
struct drm_i915_error_object *obj = error->ringbuffer;
|
||||
|
||||
seq_printf(m, "--- ringbuffer = 0x%08x\n", obj->gtt_offset);
|
||||
offset = 0;
|
||||
for (page = 0; page < obj->page_count; page++) {
|
||||
for (elt = 0; elt < PAGE_SIZE/4; elt++) {
|
||||
seq_printf(m, "%08x : %08x\n", offset, obj->pages[page][elt]);
|
||||
offset += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&dev_priv->error_lock, flags);
|
||||
|
@ -1644,6 +1644,8 @@ int i915_driver_unload(struct drm_device *dev)
|
||||
{
|
||||
struct drm_i915_private *dev_priv = dev->dev_private;
|
||||
|
||||
i915_destroy_error_state(dev);
|
||||
|
||||
destroy_workqueue(dev_priv->wq);
|
||||
del_timer_sync(&dev_priv->hangcheck_timer);
|
||||
|
||||
|
@ -150,7 +150,27 @@ struct drm_i915_error_state {
|
||||
u32 instps;
|
||||
u32 instdone1;
|
||||
u32 seqno;
|
||||
u64 bbaddr;
|
||||
struct timeval time;
|
||||
struct drm_i915_error_object {
|
||||
int page_count;
|
||||
u32 gtt_offset;
|
||||
u32 *pages[0];
|
||||
} *ringbuffer, *batchbuffer[2];
|
||||
struct drm_i915_error_buffer {
|
||||
size_t size;
|
||||
u32 name;
|
||||
u32 seqno;
|
||||
u32 gtt_offset;
|
||||
u32 read_domains;
|
||||
u32 write_domain;
|
||||
u32 fence_reg;
|
||||
s32 pinned:2;
|
||||
u32 tiling:2;
|
||||
u32 dirty:1;
|
||||
u32 purgeable:1;
|
||||
} *active_bo;
|
||||
u32 active_bo_count;
|
||||
};
|
||||
|
||||
struct drm_i915_display_funcs {
|
||||
@ -778,6 +798,7 @@ extern int i965_reset(struct drm_device *dev, u8 flags);
|
||||
|
||||
/* i915_irq.c */
|
||||
void i915_hangcheck_elapsed(unsigned long data);
|
||||
void i915_destroy_error_state(struct drm_device *dev);
|
||||
extern int i915_irq_emit(struct drm_device *dev, void *data,
|
||||
struct drm_file *file_priv);
|
||||
extern int i915_irq_wait(struct drm_device *dev, void *data,
|
||||
|
@ -432,6 +432,121 @@ static void i915_error_work_func(struct work_struct *work)
|
||||
}
|
||||
}
|
||||
|
||||
static struct drm_i915_error_object *
|
||||
i915_error_object_create(struct drm_device *dev,
|
||||
struct drm_gem_object *src)
|
||||
{
|
||||
struct drm_i915_error_object *dst;
|
||||
struct drm_i915_gem_object *src_priv;
|
||||
int page, page_count;
|
||||
|
||||
if (src == NULL)
|
||||
return NULL;
|
||||
|
||||
src_priv = src->driver_private;
|
||||
if (src_priv->pages == NULL)
|
||||
return NULL;
|
||||
|
||||
page_count = src->size / PAGE_SIZE;
|
||||
|
||||
dst = kmalloc(sizeof(*dst) + page_count * sizeof (u32 *), GFP_ATOMIC);
|
||||
if (dst == NULL)
|
||||
return NULL;
|
||||
|
||||
for (page = 0; page < page_count; page++) {
|
||||
void *s, *d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
|
||||
if (d == NULL)
|
||||
goto unwind;
|
||||
s = kmap_atomic(src_priv->pages[page], KM_USER0);
|
||||
memcpy(d, s, PAGE_SIZE);
|
||||
kunmap_atomic(s, KM_USER0);
|
||||
dst->pages[page] = d;
|
||||
}
|
||||
dst->page_count = page_count;
|
||||
dst->gtt_offset = src_priv->gtt_offset;
|
||||
|
||||
return dst;
|
||||
|
||||
unwind:
|
||||
while (page--)
|
||||
kfree(dst->pages[page]);
|
||||
kfree(dst);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
i915_error_object_free(struct drm_i915_error_object *obj)
|
||||
{
|
||||
int page;
|
||||
|
||||
if (obj == NULL)
|
||||
return;
|
||||
|
||||
for (page = 0; page < obj->page_count; page++)
|
||||
kfree(obj->pages[page]);
|
||||
|
||||
kfree(obj);
|
||||
}
|
||||
|
||||
static void
|
||||
i915_error_state_free(struct drm_device *dev,
|
||||
struct drm_i915_error_state *error)
|
||||
{
|
||||
i915_error_object_free(error->batchbuffer[0]);
|
||||
i915_error_object_free(error->batchbuffer[1]);
|
||||
i915_error_object_free(error->ringbuffer);
|
||||
kfree(error->active_bo);
|
||||
kfree(error);
|
||||
}
|
||||
|
||||
static u32
|
||||
i915_get_bbaddr(struct drm_device *dev, u32 *ring)
|
||||
{
|
||||
u32 cmd;
|
||||
|
||||
if (IS_I830(dev) || IS_845G(dev))
|
||||
cmd = MI_BATCH_BUFFER;
|
||||
else if (IS_I965G(dev))
|
||||
cmd = (MI_BATCH_BUFFER_START | (2 << 6) |
|
||||
MI_BATCH_NON_SECURE_I965);
|
||||
else
|
||||
cmd = (MI_BATCH_BUFFER_START | (2 << 6));
|
||||
|
||||
return ring[0] == cmd ? ring[1] : 0;
|
||||
}
|
||||
|
||||
static u32
|
||||
i915_ringbuffer_last_batch(struct drm_device *dev)
|
||||
{
|
||||
struct drm_i915_private *dev_priv = dev->dev_private;
|
||||
u32 head, bbaddr;
|
||||
u32 *ring;
|
||||
|
||||
/* Locate the current position in the ringbuffer and walk back
|
||||
* to find the most recently dispatched batch buffer.
|
||||
*/
|
||||
bbaddr = 0;
|
||||
head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
|
||||
ring = (u32 *)(dev_priv->ring.virtual_start + head);
|
||||
|
||||
while (--ring >= (u32 *)dev_priv->ring.virtual_start) {
|
||||
bbaddr = i915_get_bbaddr(dev, ring);
|
||||
if (bbaddr)
|
||||
break;
|
||||
}
|
||||
|
||||
if (bbaddr == 0) {
|
||||
ring = (u32 *)(dev_priv->ring.virtual_start + dev_priv->ring.Size);
|
||||
while (--ring >= (u32 *)dev_priv->ring.virtual_start) {
|
||||
bbaddr = i915_get_bbaddr(dev, ring);
|
||||
if (bbaddr)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return bbaddr;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_capture_error_state - capture an error record for later analysis
|
||||
* @dev: drm device
|
||||
@ -444,19 +559,26 @@ static void i915_error_work_func(struct work_struct *work)
|
||||
static void i915_capture_error_state(struct drm_device *dev)
|
||||
{
|
||||
struct drm_i915_private *dev_priv = dev->dev_private;
|
||||
struct drm_i915_gem_object *obj_priv;
|
||||
struct drm_i915_error_state *error;
|
||||
struct drm_gem_object *batchbuffer[2];
|
||||
unsigned long flags;
|
||||
u32 bbaddr;
|
||||
int count;
|
||||
|
||||
spin_lock_irqsave(&dev_priv->error_lock, flags);
|
||||
if (dev_priv->first_error)
|
||||
goto out;
|
||||
error = dev_priv->first_error;
|
||||
spin_unlock_irqrestore(&dev_priv->error_lock, flags);
|
||||
if (error)
|
||||
return;
|
||||
|
||||
error = kmalloc(sizeof(*error), GFP_ATOMIC);
|
||||
if (!error) {
|
||||
DRM_DEBUG_DRIVER("out ot memory, not capturing error state\n");
|
||||
goto out;
|
||||
DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
|
||||
return;
|
||||
}
|
||||
|
||||
error->seqno = i915_get_gem_seqno(dev);
|
||||
error->eir = I915_READ(EIR);
|
||||
error->pgtbl_er = I915_READ(PGTBL_ER);
|
||||
error->pipeastat = I915_READ(PIPEASTAT);
|
||||
@ -467,6 +589,7 @@ static void i915_capture_error_state(struct drm_device *dev)
|
||||
error->ipehr = I915_READ(IPEHR);
|
||||
error->instdone = I915_READ(INSTDONE);
|
||||
error->acthd = I915_READ(ACTHD);
|
||||
error->bbaddr = 0;
|
||||
} else {
|
||||
error->ipeir = I915_READ(IPEIR_I965);
|
||||
error->ipehr = I915_READ(IPEHR_I965);
|
||||
@ -474,14 +597,101 @@ static void i915_capture_error_state(struct drm_device *dev)
|
||||
error->instps = I915_READ(INSTPS);
|
||||
error->instdone1 = I915_READ(INSTDONE1);
|
||||
error->acthd = I915_READ(ACTHD_I965);
|
||||
error->bbaddr = I915_READ64(BB_ADDR);
|
||||
}
|
||||
|
||||
bbaddr = i915_ringbuffer_last_batch(dev);
|
||||
|
||||
/* Grab the current batchbuffer, most likely to have crashed. */
|
||||
batchbuffer[0] = NULL;
|
||||
batchbuffer[1] = NULL;
|
||||
count = 0;
|
||||
list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) {
|
||||
struct drm_gem_object *obj = obj_priv->obj;
|
||||
|
||||
if (batchbuffer[0] == NULL &&
|
||||
bbaddr >= obj_priv->gtt_offset &&
|
||||
bbaddr < obj_priv->gtt_offset + obj->size)
|
||||
batchbuffer[0] = obj;
|
||||
|
||||
if (batchbuffer[1] == NULL &&
|
||||
error->acthd >= obj_priv->gtt_offset &&
|
||||
error->acthd < obj_priv->gtt_offset + obj->size &&
|
||||
batchbuffer[0] != obj)
|
||||
batchbuffer[1] = obj;
|
||||
|
||||
count++;
|
||||
}
|
||||
|
||||
/* We need to copy these to an anonymous buffer as the simplest
|
||||
* method to avoid being overwritten by userpace.
|
||||
*/
|
||||
error->batchbuffer[0] = i915_error_object_create(dev, batchbuffer[0]);
|
||||
error->batchbuffer[1] = i915_error_object_create(dev, batchbuffer[1]);
|
||||
|
||||
/* Record the ringbuffer */
|
||||
error->ringbuffer = i915_error_object_create(dev, dev_priv->ring.ring_obj);
|
||||
|
||||
/* Record buffers on the active list. */
|
||||
error->active_bo = NULL;
|
||||
error->active_bo_count = 0;
|
||||
|
||||
if (count)
|
||||
error->active_bo = kmalloc(sizeof(*error->active_bo)*count,
|
||||
GFP_ATOMIC);
|
||||
|
||||
if (error->active_bo) {
|
||||
int i = 0;
|
||||
list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) {
|
||||
struct drm_gem_object *obj = obj_priv->obj;
|
||||
|
||||
error->active_bo[i].size = obj->size;
|
||||
error->active_bo[i].name = obj->name;
|
||||
error->active_bo[i].seqno = obj_priv->last_rendering_seqno;
|
||||
error->active_bo[i].gtt_offset = obj_priv->gtt_offset;
|
||||
error->active_bo[i].read_domains = obj->read_domains;
|
||||
error->active_bo[i].write_domain = obj->write_domain;
|
||||
error->active_bo[i].fence_reg = obj_priv->fence_reg;
|
||||
error->active_bo[i].pinned = 0;
|
||||
if (obj_priv->pin_count > 0)
|
||||
error->active_bo[i].pinned = 1;
|
||||
if (obj_priv->user_pin_count > 0)
|
||||
error->active_bo[i].pinned = -1;
|
||||
error->active_bo[i].tiling = obj_priv->tiling_mode;
|
||||
error->active_bo[i].dirty = obj_priv->dirty;
|
||||
error->active_bo[i].purgeable = obj_priv->madv != I915_MADV_WILLNEED;
|
||||
|
||||
if (++i == count)
|
||||
break;
|
||||
}
|
||||
error->active_bo_count = i;
|
||||
}
|
||||
|
||||
do_gettimeofday(&error->time);
|
||||
|
||||
dev_priv->first_error = error;
|
||||
|
||||
out:
|
||||
spin_lock_irqsave(&dev_priv->error_lock, flags);
|
||||
if (dev_priv->first_error == NULL) {
|
||||
dev_priv->first_error = error;
|
||||
error = NULL;
|
||||
}
|
||||
spin_unlock_irqrestore(&dev_priv->error_lock, flags);
|
||||
|
||||
if (error)
|
||||
i915_error_state_free(dev, error);
|
||||
}
|
||||
|
||||
void i915_destroy_error_state(struct drm_device *dev)
|
||||
{
|
||||
struct drm_i915_private *dev_priv = dev->dev_private;
|
||||
struct drm_i915_error_state *error;
|
||||
|
||||
spin_lock(&dev_priv->error_lock);
|
||||
error = dev_priv->first_error;
|
||||
dev_priv->first_error = NULL;
|
||||
spin_unlock(&dev_priv->error_lock);
|
||||
|
||||
if (error)
|
||||
i915_error_state_free(dev, error);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -328,6 +328,7 @@
|
||||
#define CM0_COLOR_EVICT_DISABLE (1<<3)
|
||||
#define CM0_DEPTH_WRITE_DISABLE (1<<1)
|
||||
#define CM0_RC_OP_FLUSH_DISABLE (1<<0)
|
||||
#define BB_ADDR 0x02140 /* 8 bytes */
|
||||
#define GFX_FLSH_CNTL 0x02170 /* 915+ only */
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user