38,8 → 38,10 |
#define VCE_IDLE_TIMEOUT_MS 1000 |
|
/* Firmware Names */ |
#define FIRMWARE_TAHITI "radeon/TAHITI_vce.bin" |
#define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin" |
|
MODULE_FIRMWARE(FIRMWARE_TAHITI); |
MODULE_FIRMWARE(FIRMWARE_BONAIRE); |
|
static void radeon_vce_idle_work_handler(struct work_struct *work); |
63,6 → 65,14 |
INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler); |
|
switch (rdev->family) { |
case CHIP_TAHITI: |
case CHIP_PITCAIRN: |
case CHIP_VERDE: |
case CHIP_OLAND: |
case CHIP_ARUBA: |
fw_name = FIRMWARE_TAHITI; |
break; |
|
case CHIP_BONAIRE: |
case CHIP_KAVERI: |
case CHIP_KABINI: |
118,13 → 128,17 |
rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8); |
|
/* we can only work with this fw version for now */ |
if (rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) |
if ((rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) && |
(rdev->vce.fw_version != ((50 << 24) | (0 << 16) | (1 << 8))) && |
(rdev->vce.fw_version != ((50 << 24) | (1 << 16) | (2 << 8)))) |
return -EINVAL; |
|
/* allocate firmware, stack and heap BO */ |
|
size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + |
RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; |
if (rdev->family < CHIP_BONAIRE) |
size = vce_v1_0_bo_size(rdev); |
else |
size = vce_v2_0_bo_size(rdev); |
r = radeon_bo_create(rdev, size, PAGE_SIZE, true, |
RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, |
&rdev->vce.vcpu_bo); |
225,6 → 239,10 |
return r; |
} |
|
memset(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo)); |
if (rdev->family < CHIP_BONAIRE) |
r = vce_v1_0_load_fw(rdev, cpu_addr); |
else |
memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); |
|
radeon_bo_kunmap(rdev->vce.vcpu_bo); |
231,7 → 249,7 |
|
radeon_bo_unreserve(rdev->vce.vcpu_bo); |
|
return 0; |
return r; |
} |
|
/** |
343,31 → 361,31 |
|
/* stitch together an VCE create msg */ |
ib.length_dw = 0; |
ib.ptr[ib.length_dw++] = 0x0000000c; /* len */ |
ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */ |
ib.ptr[ib.length_dw++] = handle; |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000c); /* len */ |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001); /* session cmd */ |
ib.ptr[ib.length_dw++] = cpu_to_le32(handle); |
|
ib.ptr[ib.length_dw++] = 0x00000030; /* len */ |
ib.ptr[ib.length_dw++] = 0x01000001; /* create cmd */ |
ib.ptr[ib.length_dw++] = 0x00000000; |
ib.ptr[ib.length_dw++] = 0x00000042; |
ib.ptr[ib.length_dw++] = 0x0000000a; |
ib.ptr[ib.length_dw++] = 0x00000001; |
ib.ptr[ib.length_dw++] = 0x00000080; |
ib.ptr[ib.length_dw++] = 0x00000060; |
ib.ptr[ib.length_dw++] = 0x00000100; |
ib.ptr[ib.length_dw++] = 0x00000100; |
ib.ptr[ib.length_dw++] = 0x0000000c; |
ib.ptr[ib.length_dw++] = 0x00000000; |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000030); /* len */ |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x01000001); /* create cmd */ |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000000); |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000042); |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000a); |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001); |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000080); |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000060); |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000100); |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000100); |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000c); |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000000); |
|
ib.ptr[ib.length_dw++] = 0x00000014; /* len */ |
ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */ |
ib.ptr[ib.length_dw++] = upper_32_bits(dummy); |
ib.ptr[ib.length_dw++] = dummy; |
ib.ptr[ib.length_dw++] = 0x00000001; |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000014); /* len */ |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x05000005); /* feedback buffer */ |
ib.ptr[ib.length_dw++] = cpu_to_le32(upper_32_bits(dummy)); |
ib.ptr[ib.length_dw++] = cpu_to_le32(dummy); |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001); |
|
for (i = ib.length_dw; i < ib_size_dw; ++i) |
ib.ptr[i] = 0x0; |
ib.ptr[i] = cpu_to_le32(0x0); |
|
r = radeon_ib_schedule(rdev, &ib, NULL, false); |
if (r) { |
410,21 → 428,21 |
|
/* stitch together an VCE destroy msg */ |
ib.length_dw = 0; |
ib.ptr[ib.length_dw++] = 0x0000000c; /* len */ |
ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */ |
ib.ptr[ib.length_dw++] = handle; |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000c); /* len */ |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001); /* session cmd */ |
ib.ptr[ib.length_dw++] = cpu_to_le32(handle); |
|
ib.ptr[ib.length_dw++] = 0x00000014; /* len */ |
ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */ |
ib.ptr[ib.length_dw++] = upper_32_bits(dummy); |
ib.ptr[ib.length_dw++] = dummy; |
ib.ptr[ib.length_dw++] = 0x00000001; |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000014); /* len */ |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x05000005); /* feedback buffer */ |
ib.ptr[ib.length_dw++] = cpu_to_le32(upper_32_bits(dummy)); |
ib.ptr[ib.length_dw++] = cpu_to_le32(dummy); |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001); |
|
ib.ptr[ib.length_dw++] = 0x00000008; /* len */ |
ib.ptr[ib.length_dw++] = 0x02000001; /* destroy cmd */ |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000008); /* len */ |
ib.ptr[ib.length_dw++] = cpu_to_le32(0x02000001); /* destroy cmd */ |
|
for (i = ib.length_dw; i < ib_size_dw; ++i) |
ib.ptr[i] = 0x0; |
ib.ptr[i] = cpu_to_le32(0x0); |
|
r = radeon_ib_schedule(rdev, &ib, NULL, false); |
if (r) { |
493,19 → 511,28 |
* |
* @p: parser context |
* @handle: handle to validate |
* @allocated: allocated a new handle? |
* |
* Validates the handle and return the found session index or -EINVAL |
* we we don't have another free session index. |
*/ |
int radeon_vce_validate_handle(struct radeon_cs_parser *p, uint32_t handle) |
static int radeon_vce_validate_handle(struct radeon_cs_parser *p, |
uint32_t handle, bool *allocated) |
{ |
unsigned i; |
|
*allocated = false; |
|
/* validate the handle */ |
for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { |
if (atomic_read(&p->rdev->vce.handles[i]) == handle) |
if (atomic_read(&p->rdev->vce.handles[i]) == handle) { |
if (p->rdev->vce.filp[i] != p->filp) { |
DRM_ERROR("VCE handle collision detected!\n"); |
return -EINVAL; |
} |
return i; |
} |
} |
|
/* handle not found try to alloc a new one */ |
for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { |
512,6 → 539,7 |
if (!atomic_cmpxchg(&p->rdev->vce.handles[i], 0, handle)) { |
p->rdev->vce.filp[i] = p->filp; |
p->rdev->vce.img_size[i] = 0; |
*allocated = true; |
return i; |
} |
} |
529,10 → 557,10 |
int radeon_vce_cs_parse(struct radeon_cs_parser *p) |
{ |
int session_idx = -1; |
bool destroyed = false; |
bool destroyed = false, created = false, allocated = false; |
uint32_t tmp, handle = 0; |
uint32_t *size = &tmp; |
int i, r; |
int i, r = 0; |
|
while (p->idx < p->chunk_ib->length_dw) { |
uint32_t len = radeon_get_ib_value(p, p->idx); |
540,18 → 568,21 |
|
if ((len < 8) || (len & 3)) { |
DRM_ERROR("invalid VCE command length (%d)!\n", len); |
return -EINVAL; |
r = -EINVAL; |
goto out; |
} |
|
if (destroyed) { |
DRM_ERROR("No other command allowed after destroy!\n"); |
return -EINVAL; |
r = -EINVAL; |
goto out; |
} |
|
switch (cmd) { |
case 0x00000001: // session |
handle = radeon_get_ib_value(p, p->idx + 2); |
session_idx = radeon_vce_validate_handle(p, handle); |
session_idx = radeon_vce_validate_handle(p, handle, |
&allocated); |
if (session_idx < 0) |
return session_idx; |
size = &p->rdev->vce.img_size[session_idx]; |
561,6 → 592,13 |
break; |
|
case 0x01000001: // create |
created = true; |
if (!allocated) { |
DRM_ERROR("Handle already in use!\n"); |
r = -EINVAL; |
goto out; |
} |
|
*size = radeon_get_ib_value(p, p->idx + 8) * |
radeon_get_ib_value(p, p->idx + 10) * |
8 * 3 / 2; |
571,6 → 609,7 |
case 0x04000005: // rate control |
case 0x04000007: // motion estimation |
case 0x04000008: // rdo |
case 0x04000009: // vui |
break; |
|
case 0x03000001: // encode |
577,12 → 616,12 |
r = radeon_vce_cs_reloc(p, p->idx + 10, p->idx + 9, |
*size); |
if (r) |
return r; |
goto out; |
|
r = radeon_vce_cs_reloc(p, p->idx + 12, p->idx + 11, |
*size / 3); |
if (r) |
return r; |
goto out; |
break; |
|
case 0x02000001: // destroy |
593,7 → 632,7 |
r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2, |
*size * 2); |
if (r) |
return r; |
goto out; |
break; |
|
case 0x05000004: // video bitstream buffer |
601,7 → 640,7 |
r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2, |
tmp); |
if (r) |
return r; |
goto out; |
break; |
|
case 0x05000005: // feedback buffer |
608,29 → 647,40 |
r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2, |
4096); |
if (r) |
return r; |
goto out; |
break; |
|
default: |
DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); |
return -EINVAL; |
r = -EINVAL; |
goto out; |
} |
|
if (session_idx == -1) { |
DRM_ERROR("no session command at start of IB\n"); |
return -EINVAL; |
r = -EINVAL; |
goto out; |
} |
|
p->idx += len / 4; |
} |
|
if (destroyed) { |
/* IB contains a destroy msg, free the handle */ |
if (allocated && !created) { |
DRM_ERROR("New session without create command!\n"); |
r = -ENOENT; |
} |
|
out: |
if ((!r && destroyed) || (r && allocated)) { |
/* |
* IB contains a destroy msg or we have allocated an |
* handle and got an error, anyway free the handle |
*/ |
for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) |
atomic_cmpxchg(&p->rdev->vce.handles[i], handle, 0); |
} |
|
return 0; |
return r; |
} |
|
/** |
649,12 → 699,12 |
{ |
uint64_t addr = semaphore->gpu_addr; |
|
radeon_ring_write(ring, VCE_CMD_SEMAPHORE); |
radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); |
radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); |
radeon_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0)); |
radeon_ring_write(ring, cpu_to_le32(VCE_CMD_SEMAPHORE)); |
radeon_ring_write(ring, cpu_to_le32((addr >> 3) & 0x000FFFFF)); |
radeon_ring_write(ring, cpu_to_le32((addr >> 23) & 0x000FFFFF)); |
radeon_ring_write(ring, cpu_to_le32(0x01003000 | (emit_wait ? 1 : 0))); |
if (!emit_wait) |
radeon_ring_write(ring, VCE_CMD_END); |
radeon_ring_write(ring, cpu_to_le32(VCE_CMD_END)); |
|
return true; |
} |
669,10 → 719,10 |
void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) |
{ |
struct radeon_ring *ring = &rdev->ring[ib->ring]; |
radeon_ring_write(ring, VCE_CMD_IB); |
radeon_ring_write(ring, ib->gpu_addr); |
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr)); |
radeon_ring_write(ring, ib->length_dw); |
radeon_ring_write(ring, cpu_to_le32(VCE_CMD_IB)); |
radeon_ring_write(ring, cpu_to_le32(ib->gpu_addr)); |
radeon_ring_write(ring, cpu_to_le32(upper_32_bits(ib->gpu_addr))); |
radeon_ring_write(ring, cpu_to_le32(ib->length_dw)); |
} |
|
/** |
688,12 → 738,12 |
struct radeon_ring *ring = &rdev->ring[fence->ring]; |
uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr; |
|
radeon_ring_write(ring, VCE_CMD_FENCE); |
radeon_ring_write(ring, addr); |
radeon_ring_write(ring, upper_32_bits(addr)); |
radeon_ring_write(ring, fence->seq); |
radeon_ring_write(ring, VCE_CMD_TRAP); |
radeon_ring_write(ring, VCE_CMD_END); |
radeon_ring_write(ring, cpu_to_le32(VCE_CMD_FENCE)); |
radeon_ring_write(ring, cpu_to_le32(addr)); |
radeon_ring_write(ring, cpu_to_le32(upper_32_bits(addr))); |
radeon_ring_write(ring, cpu_to_le32(fence->seq)); |
radeon_ring_write(ring, cpu_to_le32(VCE_CMD_TRAP)); |
radeon_ring_write(ring, cpu_to_le32(VCE_CMD_END)); |
} |
|
/** |
715,7 → 765,7 |
ring->idx, r); |
return r; |
} |
radeon_ring_write(ring, VCE_CMD_END); |
radeon_ring_write(ring, cpu_to_le32(VCE_CMD_END)); |
radeon_ring_unlock_commit(rdev, ring, false); |
|
for (i = 0; i < rdev->usec_timeout; i++) { |