0,0 → 1,506 |
// |
// Copyright 2012 Francisco Jerez |
// |
// Permission is hereby granted, free of charge, to any person obtaining a |
// copy of this software and associated documentation files (the "Software"), |
// to deal in the Software without restriction, including without limitation |
// the rights to use, copy, modify, merge, publish, distribute, sublicense, |
// and/or sell copies of the Software, and to permit persons to whom the |
// Software is furnished to do so, subject to the following conditions: |
// |
// The above copyright notice and this permission notice shall be included in |
// all copies or substantial portions of the Software. |
// |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
// OTHER DEALINGS IN THE SOFTWARE. |
// |
|
#include <cstring> |
|
#include "api/util.hpp" |
#include "core/event.hpp" |
#include "core/resource.hpp" |
|
using namespace clover; |
|
namespace { |
typedef resource::point point; |
|
/// |
/// Common argument checking shared by memory transfer commands. |
/// |
void |
validate_base(cl_command_queue q, cl_uint num_deps, const cl_event *deps) { |
if (!q) |
throw error(CL_INVALID_COMMAND_QUEUE); |
|
if (bool(num_deps) != bool(deps) || |
any_of(is_zero<cl_event>, deps, deps + num_deps)) |
throw error(CL_INVALID_EVENT_WAIT_LIST); |
|
if (any_of([&](const cl_event ev) { |
return &ev->ctx != &q->ctx; |
}, deps, deps + num_deps)) |
throw error(CL_INVALID_CONTEXT); |
} |
|
/// |
/// Memory object-specific argument checking shared by most memory |
/// transfer commands. |
/// |
void |
validate_obj(cl_command_queue q, cl_mem obj) { |
if (!obj) |
throw error(CL_INVALID_MEM_OBJECT); |
|
if (&obj->ctx != &q->ctx) |
throw error(CL_INVALID_CONTEXT); |
} |
|
/// |
/// Class that encapsulates the task of mapping an object of type |
/// \a T. The return value of get() should be implicitly |
/// convertible to \a void *. |
/// |
template<typename T> struct __map; |
|
template<> struct __map<void *> { |
static void * |
get(cl_command_queue q, void *obj, cl_map_flags flags, |
size_t offset, size_t size) { |
return (char *)obj + offset; |
} |
}; |
|
template<> struct __map<const void *> { |
static const void * |
get(cl_command_queue q, const void *obj, cl_map_flags flags, |
size_t offset, size_t size) { |
return (const char *)obj + offset; |
} |
}; |
|
template<> struct __map<memory_obj *> { |
static mapping |
get(cl_command_queue q, memory_obj *obj, cl_map_flags flags, |
size_t offset, size_t size) { |
return { *q, obj->resource(q), flags, true, { offset }, { size, 1, 1 }}; |
} |
}; |
|
/// |
/// Software copy from \a src_obj to \a dst_obj. They can be |
/// either pointers or memory objects. |
/// |
template<typename T, typename S> |
std::function<void (event &)> |
soft_copy_op(cl_command_queue q, |
T dst_obj, const point &dst_orig, const point &dst_pitch, |
S src_obj, const point &src_orig, const point &src_pitch, |
const point ®ion) { |
return [=](event &) { |
auto dst = __map<T>::get(q, dst_obj, CL_MAP_WRITE, |
dst_pitch(dst_orig), dst_pitch(region)); |
auto src = __map<S>::get(q, src_obj, CL_MAP_READ, |
src_pitch(src_orig), src_pitch(region)); |
point p; |
|
for (p[2] = 0; p[2] < region[2]; ++p[2]) { |
for (p[1] = 0; p[1] < region[1]; ++p[1]) { |
std::memcpy(static_cast<char *>(dst) + dst_pitch(p), |
static_cast<const char *>(src) + src_pitch(p), |
src_pitch[0] * region[0]); |
} |
} |
}; |
} |
|
/// |
/// Hardware copy from \a src_obj to \a dst_obj. |
/// |
template<typename T, typename S> |
std::function<void (event &)> |
hard_copy_op(cl_command_queue q, T dst_obj, const point &dst_orig, |
S src_obj, const point &src_orig, const point ®ion) { |
return [=](event &) { |
dst_obj->resource(q).copy(*q, dst_orig, region, |
src_obj->resource(q), src_orig); |
}; |
} |
} |
|
PUBLIC cl_int |
clEnqueueReadBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking, |
size_t offset, size_t size, void *ptr, |
cl_uint num_deps, const cl_event *deps, |
cl_event *ev) try { |
validate_base(q, num_deps, deps); |
validate_obj(q, obj); |
|
if (!ptr || offset > obj->size() || offset + size > obj->size()) |
throw error(CL_INVALID_VALUE); |
|
hard_event *hev = new hard_event( |
*q, CL_COMMAND_READ_BUFFER, { deps, deps + num_deps }, |
soft_copy_op(q, |
ptr, { 0 }, { 1 }, |
obj, { offset }, { 1 }, |
{ size, 1, 1 })); |
|
ret_object(ev, hev); |
return CL_SUCCESS; |
|
} catch (error &e) { |
return e.get(); |
} |
|
PUBLIC cl_int |
clEnqueueWriteBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking, |
size_t offset, size_t size, const void *ptr, |
cl_uint num_deps, const cl_event *deps, |
cl_event *ev) try { |
validate_base(q, num_deps, deps); |
validate_obj(q, obj); |
|
if (!ptr || offset > obj->size() || offset + size > obj->size()) |
throw error(CL_INVALID_VALUE); |
|
hard_event *hev = new hard_event( |
*q, CL_COMMAND_WRITE_BUFFER, { deps, deps + num_deps }, |
soft_copy_op(q, |
obj, { offset }, { 1 }, |
ptr, { 0 }, { 1 }, |
{ size, 1, 1 })); |
|
ret_object(ev, hev); |
return CL_SUCCESS; |
|
} catch (error &e) { |
return e.get(); |
} |
|
PUBLIC cl_int |
clEnqueueReadBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking, |
const size_t *obj_origin, const size_t *host_origin, |
const size_t *region, |
size_t obj_row_pitch, size_t obj_slice_pitch, |
size_t host_row_pitch, size_t host_slice_pitch, |
void *ptr, |
cl_uint num_deps, const cl_event *deps, |
cl_event *ev) try { |
validate_base(q, num_deps, deps); |
validate_obj(q, obj); |
|
if (!ptr) |
throw error(CL_INVALID_VALUE); |
|
hard_event *hev = new hard_event( |
*q, CL_COMMAND_READ_BUFFER_RECT, { deps, deps + num_deps }, |
soft_copy_op(q, |
ptr, host_origin, |
{ 1, host_row_pitch, host_slice_pitch }, |
obj, obj_origin, |
{ 1, obj_row_pitch, obj_slice_pitch }, |
region)); |
|
ret_object(ev, hev); |
return CL_SUCCESS; |
|
} catch (error &e) { |
return e.get(); |
} |
|
PUBLIC cl_int |
clEnqueueWriteBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking, |
const size_t *obj_origin, const size_t *host_origin, |
const size_t *region, |
size_t obj_row_pitch, size_t obj_slice_pitch, |
size_t host_row_pitch, size_t host_slice_pitch, |
const void *ptr, |
cl_uint num_deps, const cl_event *deps, |
cl_event *ev) try { |
validate_base(q, num_deps, deps); |
validate_obj(q, obj); |
|
if (!ptr) |
throw error(CL_INVALID_VALUE); |
|
hard_event *hev = new hard_event( |
*q, CL_COMMAND_WRITE_BUFFER_RECT, { deps, deps + num_deps }, |
soft_copy_op(q, |
obj, obj_origin, |
{ 1, obj_row_pitch, obj_slice_pitch }, |
ptr, host_origin, |
{ 1, host_row_pitch, host_slice_pitch }, |
region)); |
|
ret_object(ev, hev); |
return CL_SUCCESS; |
|
} catch (error &e) { |
return e.get(); |
} |
|
PUBLIC cl_int |
clEnqueueCopyBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, |
size_t src_offset, size_t dst_offset, size_t size, |
cl_uint num_deps, const cl_event *deps, |
cl_event *ev) try { |
validate_base(q, num_deps, deps); |
validate_obj(q, src_obj); |
validate_obj(q, dst_obj); |
|
hard_event *hev = new hard_event( |
*q, CL_COMMAND_COPY_BUFFER, { deps, deps + num_deps }, |
hard_copy_op(q, dst_obj, { dst_offset }, |
src_obj, { src_offset }, |
{ size, 1, 1 })); |
|
ret_object(ev, hev); |
return CL_SUCCESS; |
|
} catch (error &e) { |
return e.get(); |
} |
|
PUBLIC cl_int |
clEnqueueCopyBufferRect(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, |
const size_t *src_origin, const size_t *dst_origin, |
const size_t *region, |
size_t src_row_pitch, size_t src_slice_pitch, |
size_t dst_row_pitch, size_t dst_slice_pitch, |
cl_uint num_deps, const cl_event *deps, |
cl_event *ev) try { |
validate_base(q, num_deps, deps); |
validate_obj(q, src_obj); |
validate_obj(q, dst_obj); |
|
hard_event *hev = new hard_event( |
*q, CL_COMMAND_COPY_BUFFER_RECT, { deps, deps + num_deps }, |
soft_copy_op(q, |
dst_obj, dst_origin, |
{ 1, dst_row_pitch, dst_slice_pitch }, |
src_obj, src_origin, |
{ 1, src_row_pitch, src_slice_pitch }, |
region)); |
|
ret_object(ev, hev); |
return CL_SUCCESS; |
|
} catch (error &e) { |
return e.get(); |
} |
|
PUBLIC cl_int |
clEnqueueReadImage(cl_command_queue q, cl_mem obj, cl_bool blocking, |
const size_t *origin, const size_t *region, |
size_t row_pitch, size_t slice_pitch, void *ptr, |
cl_uint num_deps, const cl_event *deps, |
cl_event *ev) try { |
image *img = dynamic_cast<image *>(obj); |
|
validate_base(q, num_deps, deps); |
validate_obj(q, img); |
|
if (!ptr) |
throw error(CL_INVALID_VALUE); |
|
hard_event *hev = new hard_event( |
*q, CL_COMMAND_READ_IMAGE, { deps, deps + num_deps }, |
soft_copy_op(q, |
ptr, {}, |
{ 1, row_pitch, slice_pitch }, |
obj, origin, |
{ 1, img->row_pitch(), img->slice_pitch() }, |
region)); |
|
ret_object(ev, hev); |
return CL_SUCCESS; |
|
} catch (error &e) { |
return e.get(); |
} |
|
PUBLIC cl_int |
clEnqueueWriteImage(cl_command_queue q, cl_mem obj, cl_bool blocking, |
const size_t *origin, const size_t *region, |
size_t row_pitch, size_t slice_pitch, const void *ptr, |
cl_uint num_deps, const cl_event *deps, |
cl_event *ev) try { |
image *img = dynamic_cast<image *>(obj); |
|
validate_base(q, num_deps, deps); |
validate_obj(q, img); |
|
if (!ptr) |
throw error(CL_INVALID_VALUE); |
|
hard_event *hev = new hard_event( |
*q, CL_COMMAND_WRITE_IMAGE, { deps, deps + num_deps }, |
soft_copy_op(q, |
obj, origin, |
{ 1, img->row_pitch(), img->slice_pitch() }, |
ptr, {}, |
{ 1, row_pitch, slice_pitch }, |
region)); |
|
ret_object(ev, hev); |
return CL_SUCCESS; |
|
} catch (error &e) { |
return e.get(); |
} |
|
PUBLIC cl_int |
clEnqueueCopyImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, |
const size_t *src_origin, const size_t *dst_origin, |
const size_t *region, |
cl_uint num_deps, const cl_event *deps, |
cl_event *ev) try { |
image *src_img = dynamic_cast<image *>(src_obj); |
image *dst_img = dynamic_cast<image *>(dst_obj); |
|
validate_base(q, num_deps, deps); |
validate_obj(q, src_img); |
validate_obj(q, dst_img); |
|
hard_event *hev = new hard_event( |
*q, CL_COMMAND_COPY_IMAGE, { deps, deps + num_deps }, |
hard_copy_op(q, dst_obj, dst_origin, src_obj, src_origin, region)); |
|
ret_object(ev, hev); |
return CL_SUCCESS; |
|
} catch (error &e) { |
return e.get(); |
} |
|
PUBLIC cl_int |
clEnqueueCopyImageToBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, |
const size_t *src_origin, const size_t *region, |
size_t dst_offset, |
cl_uint num_deps, const cl_event *deps, |
cl_event *ev) try { |
image *src_img = dynamic_cast<image *>(src_obj); |
|
validate_base(q, num_deps, deps); |
validate_obj(q, src_img); |
validate_obj(q, dst_obj); |
|
hard_event *hev = new hard_event( |
*q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, { deps, deps + num_deps }, |
soft_copy_op(q, |
dst_obj, { dst_offset }, |
{ 0, 0, 0 }, |
src_obj, src_origin, |
{ 1, src_img->row_pitch(), src_img->slice_pitch() }, |
region)); |
|
ret_object(ev, hev); |
return CL_SUCCESS; |
|
} catch (error &e) { |
return e.get(); |
} |
|
PUBLIC cl_int |
clEnqueueCopyBufferToImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, |
size_t src_offset, |
const size_t *dst_origin, const size_t *region, |
cl_uint num_deps, const cl_event *deps, |
cl_event *ev) try { |
image *dst_img = dynamic_cast<image *>(dst_obj); |
|
validate_base(q, num_deps, deps); |
validate_obj(q, src_obj); |
validate_obj(q, dst_img); |
|
hard_event *hev = new hard_event( |
*q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, { deps, deps + num_deps }, |
soft_copy_op(q, |
dst_obj, dst_origin, |
{ 1, dst_img->row_pitch(), dst_img->slice_pitch() }, |
src_obj, { src_offset }, |
{ 0, 0, 0 }, |
region)); |
|
ret_object(ev, hev); |
return CL_SUCCESS; |
|
} catch (error &e) { |
return e.get(); |
} |
|
PUBLIC void * |
clEnqueueMapBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking, |
cl_map_flags flags, size_t offset, size_t size, |
cl_uint num_deps, const cl_event *deps, |
cl_event *ev, cl_int *errcode_ret) try { |
validate_base(q, num_deps, deps); |
validate_obj(q, obj); |
|
if (offset > obj->size() || offset + size > obj->size()) |
throw error(CL_INVALID_VALUE); |
|
void *map = obj->resource(q).add_map( |
*q, flags, blocking, { offset }, { size }); |
|
ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_BUFFER, |
{ deps, deps + num_deps })); |
ret_error(errcode_ret, CL_SUCCESS); |
return map; |
|
} catch (error &e) { |
ret_error(errcode_ret, e); |
return NULL; |
} |
|
PUBLIC void * |
clEnqueueMapImage(cl_command_queue q, cl_mem obj, cl_bool blocking, |
cl_map_flags flags, |
const size_t *origin, const size_t *region, |
size_t *row_pitch, size_t *slice_pitch, |
cl_uint num_deps, const cl_event *deps, |
cl_event *ev, cl_int *errcode_ret) try { |
image *img = dynamic_cast<image *>(obj); |
|
validate_base(q, num_deps, deps); |
validate_obj(q, img); |
|
void *map = obj->resource(q).add_map( |
*q, flags, blocking, origin, region); |
|
ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_IMAGE, |
{ deps, deps + num_deps })); |
ret_error(errcode_ret, CL_SUCCESS); |
return map; |
|
} catch (error &e) { |
ret_error(errcode_ret, e); |
return NULL; |
} |
|
PUBLIC cl_int |
clEnqueueUnmapMemObject(cl_command_queue q, cl_mem obj, void *ptr, |
cl_uint num_deps, const cl_event *deps, |
cl_event *ev) try { |
validate_base(q, num_deps, deps); |
validate_obj(q, obj); |
|
hard_event *hev = new hard_event( |
*q, CL_COMMAND_UNMAP_MEM_OBJECT, { deps, deps + num_deps }, |
[=](event &) { |
obj->resource(q).del_map(ptr); |
}); |
|
ret_object(ev, hev); |
return CL_SUCCESS; |
|
} catch (error &e) { |
return e.get(); |
} |