Merge changes Ide272fd6,Ia2a5c9b0,I373de721,I68cbe5cc,I0fbd6578,I3f0350af,I7123f6b5,Icab1f4e5,Icffe3482 into msm-3.4
* changes: msm: kgsl: In recovery search for command stream after global eop msm: kgsl: Try to replay commands from bad context in recovery msm: kgsl: Expire timestamps after recovery msm: kgsl: Mark the hung context in recovery before extraction msm: kgsl: Write a separate function to set the reset status msm: kgsl: Do not restore per context timestamp states msm: kgsl: Turn on preamble to enable replay of commands msm: kgsl: Separate function to detect last command in recovery msm: kgsl: Create a separate function to extract valid commands
This commit is contained in:
commit
19ffe56a37
|
@ -796,121 +796,59 @@ static int adreno_stop(struct kgsl_device *device)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static void adreno_mark_context_status(struct kgsl_device *device,
|
||||||
adreno_recover_hang(struct kgsl_device *device,
|
int recovery_status)
|
||||||
struct adreno_recovery_data *rec_data)
|
|
||||||
{
|
{
|
||||||
int ret;
|
|
||||||
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
|
|
||||||
struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
|
|
||||||
unsigned int timestamp;
|
|
||||||
unsigned int reftimestamp;
|
|
||||||
unsigned int enable_ts;
|
|
||||||
unsigned int soptimestamp;
|
|
||||||
unsigned int eoptimestamp;
|
|
||||||
struct kgsl_context *context;
|
struct kgsl_context *context;
|
||||||
struct adreno_context *adreno_context;
|
|
||||||
int next = 0;
|
int next = 0;
|
||||||
|
|
||||||
KGSL_DRV_ERR(device,
|
|
||||||
"Starting recovery from 3D GPU hang. Recovery parameters: IB1: 0x%X, "
|
|
||||||
"Bad context_id: %u, global_eop: 0x%x\n", rec_data->ib1,
|
|
||||||
rec_data->context_id, rec_data->global_eop);
|
|
||||||
|
|
||||||
/* Extract valid contents from rb which can stil be executed after
|
|
||||||
* hang */
|
|
||||||
ret = adreno_ringbuffer_extract(rb, rec_data);
|
|
||||||
if (ret)
|
|
||||||
goto done;
|
|
||||||
|
|
||||||
context = idr_find(&device->context_idr, rec_data->context_id);
|
|
||||||
if (context == NULL) {
|
|
||||||
KGSL_DRV_ERR(device, "Last context unknown id:%d\n",
|
|
||||||
rec_data->context_id);
|
|
||||||
rec_data->context_id = KGSL_MEMSTORE_GLOBAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
timestamp = rb->timestamp[KGSL_MEMSTORE_GLOBAL];
|
|
||||||
KGSL_DRV_ERR(device, "Last issued global timestamp: %x\n", timestamp);
|
|
||||||
|
|
||||||
kgsl_sharedmem_readl(&device->memstore, &reftimestamp,
|
|
||||||
KGSL_MEMSTORE_OFFSET(rec_data->context_id,
|
|
||||||
ref_wait_ts));
|
|
||||||
kgsl_sharedmem_readl(&device->memstore, &enable_ts,
|
|
||||||
KGSL_MEMSTORE_OFFSET(rec_data->context_id,
|
|
||||||
ts_cmp_enable));
|
|
||||||
kgsl_sharedmem_readl(&device->memstore, &soptimestamp,
|
|
||||||
KGSL_MEMSTORE_OFFSET(rec_data->context_id,
|
|
||||||
soptimestamp));
|
|
||||||
kgsl_sharedmem_readl(&device->memstore, &eoptimestamp,
|
|
||||||
KGSL_MEMSTORE_OFFSET(rec_data->context_id,
|
|
||||||
eoptimestamp));
|
|
||||||
/* Make sure memory is synchronized before restarting the GPU */
|
|
||||||
mb();
|
|
||||||
KGSL_CTXT_ERR(device,
|
|
||||||
"Context id that caused a GPU hang: %d\n",
|
|
||||||
rec_data->context_id);
|
|
||||||
/* restart device */
|
|
||||||
ret = adreno_stop(device);
|
|
||||||
if (ret)
|
|
||||||
goto done;
|
|
||||||
ret = adreno_start(device, true);
|
|
||||||
if (ret)
|
|
||||||
goto done;
|
|
||||||
KGSL_DRV_ERR(device, "Device has been restarted after hang\n");
|
|
||||||
/* Restore timestamp states */
|
|
||||||
kgsl_sharedmem_writel(&device->memstore,
|
|
||||||
KGSL_MEMSTORE_OFFSET(rec_data->context_id,
|
|
||||||
soptimestamp), soptimestamp);
|
|
||||||
kgsl_sharedmem_writel(&device->memstore,
|
|
||||||
KGSL_MEMSTORE_OFFSET(rec_data->context_id,
|
|
||||||
eoptimestamp), eoptimestamp);
|
|
||||||
|
|
||||||
if (rec_data->rb_size) {
|
|
||||||
kgsl_sharedmem_writel(&device->memstore,
|
|
||||||
KGSL_MEMSTORE_OFFSET(rec_data->context_id,
|
|
||||||
ref_wait_ts), reftimestamp);
|
|
||||||
kgsl_sharedmem_writel(&device->memstore,
|
|
||||||
KGSL_MEMSTORE_OFFSET(rec_data->context_id,
|
|
||||||
ts_cmp_enable), enable_ts);
|
|
||||||
}
|
|
||||||
/* Make sure all writes are posted before the GPU reads them */
|
|
||||||
wmb();
|
|
||||||
/* Mark the invalid context so no more commands are accepted from
|
|
||||||
* that context */
|
|
||||||
|
|
||||||
adreno_context = context->devctxt;
|
|
||||||
|
|
||||||
KGSL_CTXT_ERR(device,
|
|
||||||
"Context that caused a GPU hang: %d\n", adreno_context->id);
|
|
||||||
|
|
||||||
adreno_context->flags |= CTXT_FLAGS_GPU_HANG;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set the reset status of all contexts to
|
* Set the reset status of all contexts to
|
||||||
* INNOCENT_CONTEXT_RESET_EXT except for the bad context
|
* INNOCENT_CONTEXT_RESET_EXT except for the bad context
|
||||||
* since thats the guilty party
|
* since thats the guilty party, if recovery failed then
|
||||||
|
* mark all as guilty
|
||||||
*/
|
*/
|
||||||
while ((context = idr_get_next(&device->context_idr, &next))) {
|
while ((context = idr_get_next(&device->context_idr, &next))) {
|
||||||
if (KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT !=
|
struct adreno_context *adreno_context = context->devctxt;
|
||||||
|
if (recovery_status) {
|
||||||
|
context->reset_status =
|
||||||
|
KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
|
||||||
|
adreno_context->flags |= CTXT_FLAGS_GPU_HANG;
|
||||||
|
} else if (KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT !=
|
||||||
context->reset_status) {
|
context->reset_status) {
|
||||||
if (context->id != rec_data->context_id)
|
if (adreno_context->flags & (CTXT_FLAGS_GPU_HANG ||
|
||||||
context->reset_status =
|
CTXT_FLAGS_GPU_HANG_RECOVERED))
|
||||||
KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT;
|
|
||||||
else
|
|
||||||
context->reset_status =
|
context->reset_status =
|
||||||
KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
|
KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
|
||||||
|
else
|
||||||
|
context->reset_status =
|
||||||
|
KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT;
|
||||||
}
|
}
|
||||||
next = next + 1;
|
next = next + 1;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Restore valid commands in ringbuffer */
|
static void adreno_set_max_ts_for_bad_ctxs(struct kgsl_device *device)
|
||||||
adreno_ringbuffer_restore(rb, rec_data->rb_buffer, rec_data->rb_size);
|
{
|
||||||
rb->timestamp[KGSL_MEMSTORE_GLOBAL] = timestamp;
|
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
|
||||||
/* wait for idle */
|
struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
|
||||||
ret = adreno_idle(device, KGSL_TIMEOUT_DEFAULT);
|
struct kgsl_context *context;
|
||||||
done:
|
struct adreno_context *temp_adreno_context;
|
||||||
return ret;
|
int next = 0;
|
||||||
|
|
||||||
|
while ((context = idr_get_next(&device->context_idr, &next))) {
|
||||||
|
temp_adreno_context = context->devctxt;
|
||||||
|
if (temp_adreno_context->flags & CTXT_FLAGS_GPU_HANG) {
|
||||||
|
kgsl_sharedmem_writel(&device->memstore,
|
||||||
|
KGSL_MEMSTORE_OFFSET(context->id,
|
||||||
|
soptimestamp),
|
||||||
|
rb->timestamp[context->id]);
|
||||||
|
kgsl_sharedmem_writel(&device->memstore,
|
||||||
|
KGSL_MEMSTORE_OFFSET(context->id,
|
||||||
|
eoptimestamp),
|
||||||
|
rb->timestamp[context->id]);
|
||||||
|
}
|
||||||
|
next = next + 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void adreno_destroy_recovery_data(struct adreno_recovery_data *rec_data)
|
static void adreno_destroy_recovery_data(struct adreno_recovery_data *rec_data)
|
||||||
|
@ -966,7 +904,179 @@ done:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int adreno_dump_and_recover(struct kgsl_device *device)
|
static int
|
||||||
|
_adreno_recover_hang(struct kgsl_device *device,
|
||||||
|
struct adreno_recovery_data *rec_data,
|
||||||
|
bool try_bad_commands)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
|
||||||
|
struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
|
||||||
|
struct kgsl_context *context;
|
||||||
|
struct adreno_context *adreno_context = NULL;
|
||||||
|
struct adreno_context *last_active_ctx = adreno_dev->drawctxt_active;
|
||||||
|
|
||||||
|
context = idr_find(&device->context_idr, rec_data->context_id);
|
||||||
|
if (context == NULL) {
|
||||||
|
KGSL_DRV_ERR(device, "Last context unknown id:%d\n",
|
||||||
|
rec_data->context_id);
|
||||||
|
} else {
|
||||||
|
adreno_context = context->devctxt;
|
||||||
|
adreno_context->flags |= CTXT_FLAGS_GPU_HANG;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Extract valid contents from rb which can still be executed after
|
||||||
|
* hang */
|
||||||
|
ret = adreno_ringbuffer_extract(rb, rec_data);
|
||||||
|
if (ret)
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
/* restart device */
|
||||||
|
ret = adreno_stop(device);
|
||||||
|
if (ret) {
|
||||||
|
KGSL_DRV_ERR(device, "Device stop failed in recovery\n");
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = adreno_start(device, true);
|
||||||
|
if (ret) {
|
||||||
|
KGSL_DRV_ERR(device, "Device start failed in recovery\n");
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (context)
|
||||||
|
kgsl_mmu_setstate(&device->mmu, adreno_context->pagetable,
|
||||||
|
KGSL_MEMSTORE_GLOBAL);
|
||||||
|
|
||||||
|
/* Do not try the bad caommands if recovery has failed bad commands
|
||||||
|
* once already */
|
||||||
|
if (!try_bad_commands)
|
||||||
|
rec_data->bad_rb_size = 0;
|
||||||
|
|
||||||
|
if (rec_data->bad_rb_size) {
|
||||||
|
int idle_ret;
|
||||||
|
/* submit the bad and good context commands and wait for
|
||||||
|
* them to pass */
|
||||||
|
adreno_ringbuffer_restore(rb, rec_data->bad_rb_buffer,
|
||||||
|
rec_data->bad_rb_size);
|
||||||
|
idle_ret = adreno_idle(device, KGSL_TIMEOUT_DEFAULT);
|
||||||
|
if (idle_ret) {
|
||||||
|
ret = adreno_stop(device);
|
||||||
|
if (ret) {
|
||||||
|
KGSL_DRV_ERR(device,
|
||||||
|
"Device stop failed in recovery\n");
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
ret = adreno_start(device, true);
|
||||||
|
if (ret) {
|
||||||
|
KGSL_DRV_ERR(device,
|
||||||
|
"Device start failed in recovery\n");
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
ret = idle_ret;
|
||||||
|
KGSL_DRV_ERR(device,
|
||||||
|
"Bad context commands hung in recovery\n");
|
||||||
|
} else {
|
||||||
|
KGSL_DRV_ERR(device,
|
||||||
|
"Bad context commands succeeded in recovery\n");
|
||||||
|
if (adreno_context)
|
||||||
|
adreno_context->flags = (adreno_context->flags &
|
||||||
|
~CTXT_FLAGS_GPU_HANG) |
|
||||||
|
CTXT_FLAGS_GPU_HANG_RECOVERED;
|
||||||
|
adreno_dev->drawctxt_active = last_active_ctx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* If either the bad command sequence failed or we did not play it */
|
||||||
|
if (ret || !rec_data->bad_rb_size) {
|
||||||
|
adreno_ringbuffer_restore(rb, rec_data->rb_buffer,
|
||||||
|
rec_data->rb_size);
|
||||||
|
ret = adreno_idle(device, KGSL_TIMEOUT_DEFAULT);
|
||||||
|
if (ret) {
|
||||||
|
/* If we fail here we can try to invalidate another
|
||||||
|
* context and try recovering again */
|
||||||
|
ret = -EAGAIN;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
/* ringbuffer now has data from the last valid context id,
|
||||||
|
* so restore the active_ctx to the last valid context */
|
||||||
|
if (rec_data->last_valid_ctx_id) {
|
||||||
|
struct kgsl_context *last_ctx =
|
||||||
|
idr_find(&device->context_idr,
|
||||||
|
rec_data->last_valid_ctx_id);
|
||||||
|
if (last_ctx)
|
||||||
|
adreno_dev->drawctxt_active = last_ctx->devctxt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
done:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
adreno_recover_hang(struct kgsl_device *device,
|
||||||
|
struct adreno_recovery_data *rec_data)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
|
||||||
|
struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
|
||||||
|
unsigned int timestamp;
|
||||||
|
|
||||||
|
KGSL_DRV_ERR(device,
|
||||||
|
"Starting recovery from 3D GPU hang. Recovery parameters: IB1: 0x%X, "
|
||||||
|
"Bad context_id: %u, global_eop: 0x%x\n",
|
||||||
|
rec_data->ib1, rec_data->context_id, rec_data->global_eop);
|
||||||
|
|
||||||
|
timestamp = rb->timestamp[KGSL_MEMSTORE_GLOBAL];
|
||||||
|
KGSL_DRV_ERR(device, "Last issued global timestamp: %x\n", timestamp);
|
||||||
|
|
||||||
|
/* We may need to replay commands multiple times based on whether
|
||||||
|
* multiple contexts hang the GPU */
|
||||||
|
while (true) {
|
||||||
|
if (!ret)
|
||||||
|
ret = _adreno_recover_hang(device, rec_data, true);
|
||||||
|
else
|
||||||
|
ret = _adreno_recover_hang(device, rec_data, false);
|
||||||
|
|
||||||
|
if (-EAGAIN == ret) {
|
||||||
|
/* setup new recovery parameters and retry, this
|
||||||
|
* means more than 1 contexts are causing hang */
|
||||||
|
adreno_destroy_recovery_data(rec_data);
|
||||||
|
adreno_setup_recovery_data(device, rec_data);
|
||||||
|
KGSL_DRV_ERR(device,
|
||||||
|
"Retry recovery from 3D GPU hang. Recovery parameters: "
|
||||||
|
"IB1: 0x%X, Bad context_id: %u, global_eop: 0x%x\n",
|
||||||
|
rec_data->ib1, rec_data->context_id,
|
||||||
|
rec_data->global_eop);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
/* Restore correct states after recovery */
|
||||||
|
if (adreno_dev->drawctxt_active)
|
||||||
|
device->mmu.hwpagetable =
|
||||||
|
adreno_dev->drawctxt_active->pagetable;
|
||||||
|
else
|
||||||
|
device->mmu.hwpagetable = device->mmu.defaultpagetable;
|
||||||
|
rb->timestamp[KGSL_MEMSTORE_GLOBAL] = timestamp;
|
||||||
|
kgsl_sharedmem_writel(&device->memstore,
|
||||||
|
KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
|
||||||
|
eoptimestamp),
|
||||||
|
rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
|
||||||
|
done:
|
||||||
|
adreno_set_max_ts_for_bad_ctxs(device);
|
||||||
|
adreno_mark_context_status(device, ret);
|
||||||
|
if (!ret)
|
||||||
|
KGSL_DRV_ERR(device, "Recovery succeeded\n");
|
||||||
|
else
|
||||||
|
KGSL_DRV_ERR(device, "Recovery failed\n");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
adreno_dump_and_recover(struct kgsl_device *device)
|
||||||
{
|
{
|
||||||
int result = -ETIMEDOUT;
|
int result = -ETIMEDOUT;
|
||||||
struct adreno_recovery_data rec_data;
|
struct adreno_recovery_data rec_data;
|
||||||
|
|
|
@ -44,6 +44,8 @@
|
||||||
#define CTXT_FLAGS_TRASHSTATE 0x00020000
|
#define CTXT_FLAGS_TRASHSTATE 0x00020000
|
||||||
/* per context timestamps enabled */
|
/* per context timestamps enabled */
|
||||||
#define CTXT_FLAGS_PER_CONTEXT_TS 0x00040000
|
#define CTXT_FLAGS_PER_CONTEXT_TS 0x00040000
|
||||||
|
/* Context has caused a GPU hang and recovered properly */
|
||||||
|
#define CTXT_FLAGS_GPU_HANG_RECOVERED 0x00008000
|
||||||
|
|
||||||
struct kgsl_device;
|
struct kgsl_device;
|
||||||
struct adreno_device;
|
struct adreno_device;
|
||||||
|
|
|
@ -942,177 +942,347 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
|
||||||
*/
|
*/
|
||||||
adreno_idle(device, KGSL_TIMEOUT_DEFAULT);
|
adreno_idle(device, KGSL_TIMEOUT_DEFAULT);
|
||||||
#endif
|
#endif
|
||||||
|
/* If context hung and recovered then return error so that the
|
||||||
|
* application may handle it */
|
||||||
|
if (drawctxt->flags & CTXT_FLAGS_GPU_HANG_RECOVERED)
|
||||||
|
return -EDEADLK;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
|
||||||
return 0;
|
}
|
||||||
|
|
||||||
|
static int _find_start_of_cmd_seq(struct adreno_ringbuffer *rb,
|
||||||
|
unsigned int *ptr,
|
||||||
|
bool inc)
|
||||||
|
{
|
||||||
|
int status = -EINVAL;
|
||||||
|
unsigned int val1;
|
||||||
|
unsigned int size = rb->buffer_desc.size;
|
||||||
|
unsigned int start_ptr = *ptr;
|
||||||
|
|
||||||
|
while ((start_ptr / sizeof(unsigned int)) != rb->wptr) {
|
||||||
|
if (inc)
|
||||||
|
start_ptr = adreno_ringbuffer_inc_wrapped(start_ptr,
|
||||||
|
size);
|
||||||
|
else
|
||||||
|
start_ptr = adreno_ringbuffer_dec_wrapped(start_ptr,
|
||||||
|
size);
|
||||||
|
kgsl_sharedmem_readl(&rb->buffer_desc, &val1, start_ptr);
|
||||||
|
if (KGSL_CMD_IDENTIFIER == val1) {
|
||||||
|
if ((start_ptr / sizeof(unsigned int)) != rb->wptr)
|
||||||
|
start_ptr = adreno_ringbuffer_dec_wrapped(
|
||||||
|
start_ptr, size);
|
||||||
|
*ptr = start_ptr;
|
||||||
|
status = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int _find_cmd_seq_after_eop_ts(struct adreno_ringbuffer *rb,
|
||||||
|
unsigned int *rb_rptr,
|
||||||
|
unsigned int global_eop,
|
||||||
|
bool inc)
|
||||||
|
{
|
||||||
|
int status = -EINVAL;
|
||||||
|
unsigned int temp_rb_rptr = *rb_rptr;
|
||||||
|
unsigned int size = rb->buffer_desc.size;
|
||||||
|
unsigned int val[3];
|
||||||
|
int i = 0;
|
||||||
|
bool check = false;
|
||||||
|
|
||||||
|
if (inc && temp_rb_rptr / sizeof(unsigned int) != rb->wptr)
|
||||||
|
return status;
|
||||||
|
|
||||||
|
do {
|
||||||
|
/* when decrementing we need to decrement first and
|
||||||
|
* then read make sure we cover all the data */
|
||||||
|
if (!inc)
|
||||||
|
temp_rb_rptr = adreno_ringbuffer_dec_wrapped(
|
||||||
|
temp_rb_rptr, size);
|
||||||
|
kgsl_sharedmem_readl(&rb->buffer_desc, &val[i],
|
||||||
|
temp_rb_rptr);
|
||||||
|
|
||||||
|
if (check && ((inc && val[i] == global_eop) ||
|
||||||
|
(!inc && (val[i] ==
|
||||||
|
cp_type3_packet(CP_MEM_WRITE, 2) ||
|
||||||
|
val[i] == CACHE_FLUSH_TS)))) {
|
||||||
|
/* decrement i, i.e i = (i - 1 + 3) % 3 if
|
||||||
|
* we are going forward, else increment i */
|
||||||
|
i = (i + 2) % 3;
|
||||||
|
if (val[i] == rb->device->memstore.gpuaddr +
|
||||||
|
KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
|
||||||
|
eoptimestamp)) {
|
||||||
|
int j = ((i + 2) % 3);
|
||||||
|
if ((inc && (val[j] == CACHE_FLUSH_TS ||
|
||||||
|
val[j] == cp_type3_packet(
|
||||||
|
CP_MEM_WRITE, 2))) ||
|
||||||
|
(!inc && val[j] == global_eop)) {
|
||||||
|
/* Found the global eop */
|
||||||
|
status = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* if no match found then increment i again
|
||||||
|
* since we decremented before matching */
|
||||||
|
i = (i + 1) % 3;
|
||||||
|
}
|
||||||
|
if (inc)
|
||||||
|
temp_rb_rptr = adreno_ringbuffer_inc_wrapped(
|
||||||
|
temp_rb_rptr, size);
|
||||||
|
|
||||||
|
i = (i + 1) % 3;
|
||||||
|
if (2 == i)
|
||||||
|
check = true;
|
||||||
|
} while (temp_rb_rptr / sizeof(unsigned int) != rb->wptr);
|
||||||
|
/* temp_rb_rptr points to the command stream after global eop,
|
||||||
|
* move backward till the start of command sequence */
|
||||||
|
if (!status) {
|
||||||
|
status = _find_start_of_cmd_seq(rb, &temp_rb_rptr, false);
|
||||||
|
if (!status) {
|
||||||
|
*rb_rptr = temp_rb_rptr;
|
||||||
|
KGSL_DRV_ERR(rb->device,
|
||||||
|
"Offset of cmd sequence after eop timestamp: 0x%x\n",
|
||||||
|
temp_rb_rptr / sizeof(unsigned int));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (status)
|
||||||
|
KGSL_DRV_ERR(rb->device,
|
||||||
|
"Failed to find the command sequence after eop timestamp\n");
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int _find_hanging_ib_sequence(struct adreno_ringbuffer *rb,
|
||||||
|
unsigned int *rb_rptr,
|
||||||
|
unsigned int ib1)
|
||||||
|
{
|
||||||
|
int status = -EINVAL;
|
||||||
|
unsigned int temp_rb_rptr = *rb_rptr;
|
||||||
|
unsigned int size = rb->buffer_desc.size;
|
||||||
|
unsigned int val[2];
|
||||||
|
int i = 0;
|
||||||
|
bool check = false;
|
||||||
|
bool ctx_switch = false;
|
||||||
|
|
||||||
|
while (temp_rb_rptr / sizeof(unsigned int) != rb->wptr) {
|
||||||
|
kgsl_sharedmem_readl(&rb->buffer_desc, &val[i], temp_rb_rptr);
|
||||||
|
|
||||||
|
if (check && val[i] == ib1) {
|
||||||
|
/* decrement i, i.e i = (i - 1 + 2) % 2 */
|
||||||
|
i = (i + 1) % 2;
|
||||||
|
if (adreno_cmd_is_ib(val[i])) {
|
||||||
|
/* go till start of command sequence */
|
||||||
|
status = _find_start_of_cmd_seq(rb,
|
||||||
|
&temp_rb_rptr, false);
|
||||||
|
KGSL_DRV_ERR(rb->device,
|
||||||
|
"Found the hanging IB at offset 0x%x\n",
|
||||||
|
temp_rb_rptr / sizeof(unsigned int));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* if no match the increment i since we decremented
|
||||||
|
* before checking */
|
||||||
|
i = (i + 1) % 2;
|
||||||
|
}
|
||||||
|
/* Make sure you do not encounter a context switch twice, we can
|
||||||
|
* encounter it once for the bad context as the start of search
|
||||||
|
* can point to the context switch */
|
||||||
|
if (val[i] == KGSL_CONTEXT_TO_MEM_IDENTIFIER) {
|
||||||
|
if (ctx_switch) {
|
||||||
|
KGSL_DRV_ERR(rb->device,
|
||||||
|
"Context switch encountered before bad "
|
||||||
|
"IB found\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ctx_switch = true;
|
||||||
|
}
|
||||||
|
i = (i + 1) % 2;
|
||||||
|
if (1 == i)
|
||||||
|
check = true;
|
||||||
|
temp_rb_rptr = adreno_ringbuffer_inc_wrapped(temp_rb_rptr,
|
||||||
|
size);
|
||||||
|
}
|
||||||
|
if (!status)
|
||||||
|
*rb_rptr = temp_rb_rptr;
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _turn_preamble_on_for_ib_seq(struct adreno_ringbuffer *rb,
|
||||||
|
unsigned int rb_rptr)
|
||||||
|
{
|
||||||
|
unsigned int temp_rb_rptr = rb_rptr;
|
||||||
|
unsigned int size = rb->buffer_desc.size;
|
||||||
|
unsigned int val[2];
|
||||||
|
int i = 0;
|
||||||
|
bool check = false;
|
||||||
|
bool cmd_start = false;
|
||||||
|
|
||||||
|
/* Go till the start of the ib sequence and turn on preamble */
|
||||||
|
while (temp_rb_rptr / sizeof(unsigned int) != rb->wptr) {
|
||||||
|
kgsl_sharedmem_readl(&rb->buffer_desc, &val[i], temp_rb_rptr);
|
||||||
|
if (check && KGSL_START_OF_IB_IDENTIFIER == val[i]) {
|
||||||
|
/* decrement i */
|
||||||
|
i = (i + 1) % 2;
|
||||||
|
if (val[i] == cp_nop_packet(4)) {
|
||||||
|
temp_rb_rptr = adreno_ringbuffer_dec_wrapped(
|
||||||
|
temp_rb_rptr, size);
|
||||||
|
kgsl_sharedmem_writel(&rb->buffer_desc,
|
||||||
|
temp_rb_rptr, cp_nop_packet(1));
|
||||||
|
}
|
||||||
|
KGSL_DRV_ERR(rb->device,
|
||||||
|
"Turned preamble on at offset 0x%x\n",
|
||||||
|
temp_rb_rptr / 4);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* If you reach beginning of next command sequence then exit
|
||||||
|
* First command encountered is the current one so don't break
|
||||||
|
* on that. */
|
||||||
|
if (KGSL_CMD_IDENTIFIER == val[i]) {
|
||||||
|
if (cmd_start)
|
||||||
|
break;
|
||||||
|
cmd_start = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
i = (i + 1) % 2;
|
||||||
|
if (1 == i)
|
||||||
|
check = true;
|
||||||
|
temp_rb_rptr = adreno_ringbuffer_inc_wrapped(temp_rb_rptr,
|
||||||
|
size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _copy_valid_rb_content(struct adreno_ringbuffer *rb,
|
||||||
|
unsigned int rb_rptr, unsigned int *temp_rb_buffer,
|
||||||
|
int *rb_size, unsigned int *bad_rb_buffer,
|
||||||
|
int *bad_rb_size,
|
||||||
|
int *last_valid_ctx_id)
|
||||||
|
{
|
||||||
|
unsigned int good_rb_idx = 0, cmd_start_idx = 0;
|
||||||
|
unsigned int val1 = 0;
|
||||||
|
struct kgsl_context *k_ctxt;
|
||||||
|
struct adreno_context *a_ctxt;
|
||||||
|
unsigned int bad_rb_idx = 0;
|
||||||
|
int copy_rb_contents = 0;
|
||||||
|
unsigned int temp_rb_rptr;
|
||||||
|
unsigned int size = rb->buffer_desc.size;
|
||||||
|
unsigned int good_cmd_start_idx = 0;
|
||||||
|
|
||||||
|
/* Walk the rb from the context switch. Omit any commands
|
||||||
|
* for an invalid context. */
|
||||||
|
while ((rb_rptr / sizeof(unsigned int)) != rb->wptr) {
|
||||||
|
kgsl_sharedmem_readl(&rb->buffer_desc, &val1, rb_rptr);
|
||||||
|
|
||||||
|
if (KGSL_CMD_IDENTIFIER == val1) {
|
||||||
|
/* Start is the NOP dword that comes before
|
||||||
|
* KGSL_CMD_IDENTIFIER */
|
||||||
|
cmd_start_idx = bad_rb_idx - 1;
|
||||||
|
if (copy_rb_contents)
|
||||||
|
good_cmd_start_idx = good_rb_idx - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check for context switch indicator */
|
||||||
|
if (val1 == KGSL_CONTEXT_TO_MEM_IDENTIFIER) {
|
||||||
|
unsigned int temp_idx, val2;
|
||||||
|
/* increment by 3 to get to the context_id */
|
||||||
|
temp_rb_rptr = rb_rptr + (3 * sizeof(unsigned int)) %
|
||||||
|
size;
|
||||||
|
kgsl_sharedmem_readl(&rb->buffer_desc, &val2,
|
||||||
|
temp_rb_rptr);
|
||||||
|
|
||||||
|
/* if context switches to a context that did not cause
|
||||||
|
* hang then start saving the rb contents as those
|
||||||
|
* commands can be executed */
|
||||||
|
k_ctxt = idr_find(&rb->device->context_idr, val2);
|
||||||
|
if (k_ctxt) {
|
||||||
|
a_ctxt = k_ctxt->devctxt;
|
||||||
|
|
||||||
|
/* If we are changing to a good context and were not
|
||||||
|
* copying commands then copy over commands to the good
|
||||||
|
* context */
|
||||||
|
if (!copy_rb_contents && ((k_ctxt &&
|
||||||
|
!(a_ctxt->flags & CTXT_FLAGS_GPU_HANG)) ||
|
||||||
|
!k_ctxt)) {
|
||||||
|
for (temp_idx = cmd_start_idx;
|
||||||
|
temp_idx < bad_rb_idx;
|
||||||
|
temp_idx++)
|
||||||
|
temp_rb_buffer[good_rb_idx++] =
|
||||||
|
bad_rb_buffer[temp_idx];
|
||||||
|
*last_valid_ctx_id = val2;
|
||||||
|
copy_rb_contents = 1;
|
||||||
|
} else if (copy_rb_contents && k_ctxt &&
|
||||||
|
(a_ctxt->flags & CTXT_FLAGS_GPU_HANG)) {
|
||||||
|
/* If we are changing to bad context then remove
|
||||||
|
* the dwords we copied for this sequence from
|
||||||
|
* the good buffer */
|
||||||
|
good_rb_idx = good_cmd_start_idx;
|
||||||
|
copy_rb_contents = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (copy_rb_contents)
|
||||||
|
temp_rb_buffer[good_rb_idx++] = val1;
|
||||||
|
/* Copy both good and bad commands for replay to the bad
|
||||||
|
* buffer */
|
||||||
|
bad_rb_buffer[bad_rb_idx++] = val1;
|
||||||
|
|
||||||
|
rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, size);
|
||||||
|
}
|
||||||
|
*rb_size = good_rb_idx;
|
||||||
|
*bad_rb_size = bad_rb_idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
|
int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
|
||||||
struct adreno_recovery_data *rec_data)
|
struct adreno_recovery_data *rec_data)
|
||||||
{
|
{
|
||||||
|
int status;
|
||||||
struct kgsl_device *device = rb->device;
|
struct kgsl_device *device = rb->device;
|
||||||
unsigned int rb_rptr = rb->wptr * sizeof(unsigned int);
|
unsigned int rb_rptr = rb->wptr * sizeof(unsigned int);
|
||||||
unsigned int temp_idx = 0;
|
|
||||||
unsigned int value;
|
|
||||||
unsigned int val1;
|
|
||||||
unsigned int val2;
|
|
||||||
unsigned int val3;
|
|
||||||
unsigned int copy_rb_contents = 0;
|
|
||||||
struct kgsl_context *context;
|
struct kgsl_context *context;
|
||||||
unsigned int *temp_rb_buffer = rec_data->rb_buffer;
|
struct adreno_context *adreno_context;
|
||||||
|
|
||||||
KGSL_DRV_ERR(device, "Last context id: %d\n", rec_data->context_id);
|
|
||||||
context = idr_find(&device->context_idr, rec_data->context_id);
|
context = idr_find(&device->context_idr, rec_data->context_id);
|
||||||
if (context == NULL) {
|
|
||||||
KGSL_DRV_ERR(device,
|
/* Look for the command stream that is right after the global eop */
|
||||||
"GPU recovery from hang not possible because last"
|
status = _find_cmd_seq_after_eop_ts(rb, &rb_rptr,
|
||||||
" context id is invalid.\n");
|
rec_data->global_eop + 1, false);
|
||||||
return -EINVAL;
|
if (status)
|
||||||
}
|
goto done;
|
||||||
KGSL_DRV_ERR(device, "GPU successfully executed till ts: %x\n",
|
|
||||||
rec_data->global_eop);
|
if (context) {
|
||||||
/*
|
adreno_context = context->devctxt;
|
||||||
* We need to go back in history by 4 dwords from the current location
|
|
||||||
* of read pointer as 4 dwords are read to match the end of a command.
|
if (adreno_context->flags & CTXT_FLAGS_PREAMBLE) {
|
||||||
* Also, take care of wrap around when moving back
|
if (rec_data->ib1) {
|
||||||
*/
|
status = _find_hanging_ib_sequence(rb, &rb_rptr,
|
||||||
if (rb->rptr >= 4)
|
rec_data->ib1);
|
||||||
rb_rptr = (rb->rptr - 4) * sizeof(unsigned int);
|
if (status)
|
||||||
else
|
goto copy_rb_contents;
|
||||||
rb_rptr = rb->buffer_desc.size -
|
|
||||||
((4 - rb->rptr) * sizeof(unsigned int));
|
|
||||||
/* Read the rb contents going backwards to locate end of last
|
|
||||||
* sucessfully executed command */
|
|
||||||
while ((rb_rptr / sizeof(unsigned int)) != rb->wptr) {
|
|
||||||
kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr);
|
|
||||||
if (value == rec_data->global_eop) {
|
|
||||||
rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
|
|
||||||
rb->buffer_desc.size);
|
|
||||||
kgsl_sharedmem_readl(&rb->buffer_desc, &val1, rb_rptr);
|
|
||||||
rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
|
|
||||||
rb->buffer_desc.size);
|
|
||||||
kgsl_sharedmem_readl(&rb->buffer_desc, &val2, rb_rptr);
|
|
||||||
rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
|
|
||||||
rb->buffer_desc.size);
|
|
||||||
kgsl_sharedmem_readl(&rb->buffer_desc, &val3, rb_rptr);
|
|
||||||
/* match the pattern found at the end of a command */
|
|
||||||
if ((val1 == 2 &&
|
|
||||||
val2 == cp_type3_packet(CP_INTERRUPT, 1)
|
|
||||||
&& val3 == CP_INT_CNTL__RB_INT_MASK) ||
|
|
||||||
(val1 == cp_type3_packet(CP_EVENT_WRITE, 3)
|
|
||||||
&& val2 == CACHE_FLUSH_TS &&
|
|
||||||
val3 == (rb->device->memstore.gpuaddr +
|
|
||||||
KGSL_MEMSTORE_OFFSET(rec_data->context_id,
|
|
||||||
eoptimestamp)))) {
|
|
||||||
rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
|
|
||||||
rb->buffer_desc.size);
|
|
||||||
KGSL_DRV_ERR(device,
|
|
||||||
"Found end of last executed "
|
|
||||||
"command at offset: %x\n",
|
|
||||||
rb_rptr / sizeof(unsigned int));
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
if (rb_rptr < (3 * sizeof(unsigned int)))
|
|
||||||
rb_rptr = rb->buffer_desc.size -
|
|
||||||
(3 * sizeof(unsigned int))
|
|
||||||
+ rb_rptr;
|
|
||||||
else
|
|
||||||
rb_rptr -= (3 * sizeof(unsigned int));
|
|
||||||
}
|
}
|
||||||
|
_turn_preamble_on_for_ib_seq(rb, rb_rptr);
|
||||||
|
} else {
|
||||||
|
status = -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rb_rptr == 0)
|
|
||||||
rb_rptr = rb->buffer_desc.size - sizeof(unsigned int);
|
|
||||||
else
|
|
||||||
rb_rptr -= sizeof(unsigned int);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((rb_rptr / sizeof(unsigned int)) == rb->wptr) {
|
copy_rb_contents:
|
||||||
KGSL_DRV_ERR(device,
|
_copy_valid_rb_content(rb, rb_rptr, rec_data->rb_buffer,
|
||||||
"GPU recovery from hang not possible because last"
|
&rec_data->rb_size,
|
||||||
" successful timestamp is overwritten\n");
|
rec_data->bad_rb_buffer,
|
||||||
return -EINVAL;
|
&rec_data->bad_rb_size,
|
||||||
|
&rec_data->last_valid_ctx_id);
|
||||||
|
/* If we failed to get the hanging IB sequence then we cannot execute
|
||||||
|
* commands from the bad context or preambles not supported */
|
||||||
|
if (status) {
|
||||||
|
rec_data->bad_rb_size = 0;
|
||||||
|
status = 0;
|
||||||
}
|
}
|
||||||
/* rb_rptr is now pointing to the first dword of the command following
|
/* If there is no context then that means there are no commands for
|
||||||
* the last sucessfully executed command sequence. Assumption is that
|
* good case */
|
||||||
* GPU is hung in the command sequence pointed by rb_rptr */
|
if (!context)
|
||||||
/* make sure the GPU is not hung in a command submitted by kgsl
|
rec_data->rb_size = 0;
|
||||||
* itself */
|
done:
|
||||||
kgsl_sharedmem_readl(&rb->buffer_desc, &val1, rb_rptr);
|
return status;
|
||||||
kgsl_sharedmem_readl(&rb->buffer_desc, &val2,
|
|
||||||
adreno_ringbuffer_inc_wrapped(rb_rptr,
|
|
||||||
rb->buffer_desc.size));
|
|
||||||
if (val1 == cp_nop_packet(1) && val2 == KGSL_CMD_IDENTIFIER) {
|
|
||||||
KGSL_DRV_ERR(device,
|
|
||||||
"GPU recovery from hang not possible because "
|
|
||||||
"of hang in kgsl command\n");
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
while ((rb_rptr / sizeof(unsigned int)) != rb->wptr) {
|
|
||||||
kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr);
|
|
||||||
rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
|
|
||||||
rb->buffer_desc.size);
|
|
||||||
/* check for context switch indicator */
|
|
||||||
if (value == KGSL_CONTEXT_TO_MEM_IDENTIFIER) {
|
|
||||||
kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr);
|
|
||||||
rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
|
|
||||||
rb->buffer_desc.size);
|
|
||||||
BUG_ON(value != cp_type3_packet(CP_MEM_WRITE, 2));
|
|
||||||
kgsl_sharedmem_readl(&rb->buffer_desc, &val1, rb_rptr);
|
|
||||||
rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
|
|
||||||
rb->buffer_desc.size);
|
|
||||||
BUG_ON(val1 != (device->memstore.gpuaddr +
|
|
||||||
KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
|
|
||||||
current_context)));
|
|
||||||
kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr);
|
|
||||||
rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
|
|
||||||
rb->buffer_desc.size);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If other context switches were already lost and
|
|
||||||
* and the current context is the one that is hanging,
|
|
||||||
* then we cannot recover. Print an error message
|
|
||||||
* and leave.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if ((copy_rb_contents == 0) && (value ==
|
|
||||||
rec_data->context_id)) {
|
|
||||||
KGSL_DRV_ERR(device, "GPU recovery could not "
|
|
||||||
"find the previous context\n");
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If we were copying the commands and got to this point
|
|
||||||
* then we need to remove the 3 commands that appear
|
|
||||||
* before KGSL_CONTEXT_TO_MEM_IDENTIFIER
|
|
||||||
*/
|
|
||||||
if (temp_idx)
|
|
||||||
temp_idx -= 3;
|
|
||||||
/* if context switches to a context that did not cause
|
|
||||||
* hang then start saving the rb contents as those
|
|
||||||
* commands can be executed */
|
|
||||||
if (value != rec_data->context_id) {
|
|
||||||
copy_rb_contents = 1;
|
|
||||||
temp_rb_buffer[temp_idx++] = cp_nop_packet(1);
|
|
||||||
temp_rb_buffer[temp_idx++] =
|
|
||||||
KGSL_CMD_IDENTIFIER;
|
|
||||||
temp_rb_buffer[temp_idx++] = cp_nop_packet(1);
|
|
||||||
temp_rb_buffer[temp_idx++] =
|
|
||||||
KGSL_CONTEXT_TO_MEM_IDENTIFIER;
|
|
||||||
temp_rb_buffer[temp_idx++] =
|
|
||||||
cp_type3_packet(CP_MEM_WRITE, 2);
|
|
||||||
temp_rb_buffer[temp_idx++] = val1;
|
|
||||||
temp_rb_buffer[temp_idx++] = value;
|
|
||||||
} else {
|
|
||||||
copy_rb_contents = 0;
|
|
||||||
}
|
|
||||||
} else if (copy_rb_contents)
|
|
||||||
temp_rb_buffer[temp_idx++] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
rec_data->rb_size = temp_idx;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -139,4 +139,11 @@ static inline unsigned int adreno_ringbuffer_inc_wrapped(unsigned int val,
|
||||||
return (val + sizeof(unsigned int)) % size;
|
return (val + sizeof(unsigned int)) % size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Decrement a value by 4 bytes with wrap-around based on size */
|
||||||
|
static inline unsigned int adreno_ringbuffer_dec_wrapped(unsigned int val,
|
||||||
|
unsigned int size)
|
||||||
|
{
|
||||||
|
return (val + size - sizeof(unsigned int)) % size;
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* __ADRENO_RINGBUFFER_H */
|
#endif /* __ADRENO_RINGBUFFER_H */
|
||||||
|
|
Loading…
Reference in New Issue