Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. //
  2. // Copyright 2012 Francisco Jerez
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a
  5. // copy of this software and associated documentation files (the "Software"),
  6. // to deal in the Software without restriction, including without limitation
  7. // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. // and/or sell copies of the Software, and to permit persons to whom the
  9. // Software is furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17. // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. // OTHER DEALINGS IN THE SOFTWARE.
  21. //
  22.  
  23. #include <cstring>
  24.  
  25. #include "api/util.hpp"
  26. #include "core/event.hpp"
  27. #include "core/resource.hpp"
  28.  
  29. using namespace clover;
  30.  
  31. namespace {
  32.    typedef resource::point point;
  33.  
  34.    ///
  35.    /// Common argument checking shared by memory transfer commands.
  36.    ///
  37.    void
  38.    validate_base(cl_command_queue q, cl_uint num_deps, const cl_event *deps) {
  39.       if (!q)
  40.          throw error(CL_INVALID_COMMAND_QUEUE);
  41.  
  42.       if (bool(num_deps) != bool(deps) ||
  43.           any_of(is_zero<cl_event>, deps, deps + num_deps))
  44.          throw error(CL_INVALID_EVENT_WAIT_LIST);
  45.  
  46.       if (any_of([&](const cl_event ev) {
  47.                return &ev->ctx != &q->ctx;
  48.             }, deps, deps + num_deps))
  49.          throw error(CL_INVALID_CONTEXT);
  50.    }
  51.  
  52.    ///
  53.    /// Memory object-specific argument checking shared by most memory
  54.    /// transfer commands.
  55.    ///
  56.    void
  57.    validate_obj(cl_command_queue q, cl_mem obj) {
  58.       if (!obj)
  59.          throw error(CL_INVALID_MEM_OBJECT);
  60.  
  61.       if (&obj->ctx != &q->ctx)
  62.          throw error(CL_INVALID_CONTEXT);
  63.    }
  64.  
  65.    ///
  66.    /// Class that encapsulates the task of mapping an object of type
  67.    /// \a T.  The return value of get() should be implicitly
  68.    /// convertible to \a void *.
  69.    ///
  70.    template<typename T> struct __map;
  71.  
  72.    template<> struct __map<void *> {
  73.       static void *
  74.       get(cl_command_queue q, void *obj, cl_map_flags flags,
  75.           size_t offset, size_t size) {
  76.          return (char *)obj + offset;
  77.       }
  78.    };
  79.  
  80.    template<> struct __map<const void *> {
  81.       static const void *
  82.       get(cl_command_queue q, const void *obj, cl_map_flags flags,
  83.           size_t offset, size_t size) {
  84.          return (const char *)obj + offset;
  85.       }
  86.    };
  87.  
  88.    template<> struct __map<memory_obj *> {
  89.       static mapping
  90.       get(cl_command_queue q, memory_obj *obj, cl_map_flags flags,
  91.           size_t offset, size_t size) {
  92.          return { *q, obj->resource(q), flags, true, { offset }, { size, 1, 1 }};
  93.       }
  94.    };
  95.  
  96.    ///
  97.    /// Software copy from \a src_obj to \a dst_obj.  They can be
  98.    /// either pointers or memory objects.
  99.    ///
  100.    template<typename T, typename S>
  101.    std::function<void (event &)>
  102.    soft_copy_op(cl_command_queue q,
  103.                 T dst_obj, const point &dst_orig, const point &dst_pitch,
  104.                 S src_obj, const point &src_orig, const point &src_pitch,
  105.                 const point &region) {
  106.       return [=](event &) {
  107.          auto dst = __map<T>::get(q, dst_obj, CL_MAP_WRITE,
  108.                                   dst_pitch(dst_orig), dst_pitch(region));
  109.          auto src = __map<S>::get(q, src_obj, CL_MAP_READ,
  110.                                   src_pitch(src_orig), src_pitch(region));
  111.          point p;
  112.  
  113.          for (p[2] = 0; p[2] < region[2]; ++p[2]) {
  114.             for (p[1] = 0; p[1] < region[1]; ++p[1]) {
  115.                std::memcpy(static_cast<char *>(dst) + dst_pitch(p),
  116.                            static_cast<const char *>(src) + src_pitch(p),
  117.                            src_pitch[0] * region[0]);
  118.             }
  119.          }
  120.       };
  121.    }
  122.  
  123.    ///
  124.    /// Hardware copy from \a src_obj to \a dst_obj.
  125.    ///
  126.    template<typename T, typename S>
  127.    std::function<void (event &)>
  128.    hard_copy_op(cl_command_queue q, T dst_obj, const point &dst_orig,
  129.                 S src_obj, const point &src_orig, const point &region) {
  130.       return [=](event &) {
  131.          dst_obj->resource(q).copy(*q, dst_orig, region,
  132.                                    src_obj->resource(q), src_orig);
  133.       };
  134.    }
  135. }
  136.  
  137. PUBLIC cl_int
  138. clEnqueueReadBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking,
  139.                     size_t offset, size_t size, void *ptr,
  140.                     cl_uint num_deps, const cl_event *deps,
  141.                     cl_event *ev) try {
  142.    validate_base(q, num_deps, deps);
  143.    validate_obj(q, obj);
  144.  
  145.    if (!ptr || offset > obj->size() || offset + size > obj->size())
  146.       throw error(CL_INVALID_VALUE);
  147.  
  148.    hard_event *hev = new hard_event(
  149.       *q, CL_COMMAND_READ_BUFFER, { deps, deps + num_deps },
  150.       soft_copy_op(q,
  151.                    ptr, { 0 }, { 1 },
  152.                    obj, { offset }, { 1 },
  153.                    { size, 1, 1 }));
  154.  
  155.    ret_object(ev, hev);
  156.    return CL_SUCCESS;
  157.  
  158. } catch (error &e) {
  159.    return e.get();
  160. }
  161.  
  162. PUBLIC cl_int
  163. clEnqueueWriteBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking,
  164.                      size_t offset, size_t size, const void *ptr,
  165.                      cl_uint num_deps, const cl_event *deps,
  166.                      cl_event *ev) try {
  167.    validate_base(q, num_deps, deps);
  168.    validate_obj(q, obj);
  169.  
  170.    if (!ptr || offset > obj->size() || offset + size > obj->size())
  171.       throw error(CL_INVALID_VALUE);
  172.  
  173.    hard_event *hev = new hard_event(
  174.       *q, CL_COMMAND_WRITE_BUFFER, { deps, deps + num_deps },
  175.       soft_copy_op(q,
  176.                    obj, { offset }, { 1 },
  177.                    ptr, { 0 }, { 1 },
  178.                    { size, 1, 1 }));
  179.  
  180.    ret_object(ev, hev);
  181.    return CL_SUCCESS;
  182.  
  183. } catch (error &e) {
  184.    return e.get();
  185. }
  186.  
  187. PUBLIC cl_int
  188. clEnqueueReadBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking,
  189.                         const size_t *obj_origin, const size_t *host_origin,
  190.                         const size_t *region,
  191.                         size_t obj_row_pitch, size_t obj_slice_pitch,
  192.                         size_t host_row_pitch, size_t host_slice_pitch,
  193.                         void *ptr,
  194.                         cl_uint num_deps, const cl_event *deps,
  195.                         cl_event *ev) try {
  196.    validate_base(q, num_deps, deps);
  197.    validate_obj(q, obj);
  198.  
  199.    if (!ptr)
  200.       throw error(CL_INVALID_VALUE);
  201.  
  202.    hard_event *hev = new hard_event(
  203.       *q, CL_COMMAND_READ_BUFFER_RECT, { deps, deps + num_deps },
  204.       soft_copy_op(q,
  205.                    ptr, host_origin,
  206.                    { 1, host_row_pitch, host_slice_pitch },
  207.                    obj, obj_origin,
  208.                    { 1, obj_row_pitch, obj_slice_pitch },
  209.                    region));
  210.  
  211.    ret_object(ev, hev);
  212.    return CL_SUCCESS;
  213.  
  214. } catch (error &e) {
  215.    return e.get();
  216. }
  217.  
  218. PUBLIC cl_int
  219. clEnqueueWriteBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking,
  220.                          const size_t *obj_origin, const size_t *host_origin,
  221.                          const size_t *region,
  222.                          size_t obj_row_pitch, size_t obj_slice_pitch,
  223.                          size_t host_row_pitch, size_t host_slice_pitch,
  224.                          const void *ptr,
  225.                          cl_uint num_deps, const cl_event *deps,
  226.                          cl_event *ev) try {
  227.    validate_base(q, num_deps, deps);
  228.    validate_obj(q, obj);
  229.  
  230.    if (!ptr)
  231.       throw error(CL_INVALID_VALUE);
  232.  
  233.    hard_event *hev = new hard_event(
  234.       *q, CL_COMMAND_WRITE_BUFFER_RECT, { deps, deps + num_deps },
  235.       soft_copy_op(q,
  236.                    obj, obj_origin,
  237.                    { 1, obj_row_pitch, obj_slice_pitch },
  238.                    ptr, host_origin,
  239.                    { 1, host_row_pitch, host_slice_pitch },
  240.                    region));
  241.  
  242.    ret_object(ev, hev);
  243.    return CL_SUCCESS;
  244.  
  245. } catch (error &e) {
  246.    return e.get();
  247. }
  248.  
  249. PUBLIC cl_int
  250. clEnqueueCopyBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
  251.                     size_t src_offset, size_t dst_offset, size_t size,
  252.                     cl_uint num_deps, const cl_event *deps,
  253.                     cl_event *ev) try {
  254.    validate_base(q, num_deps, deps);
  255.    validate_obj(q, src_obj);
  256.    validate_obj(q, dst_obj);
  257.  
  258.    hard_event *hev = new hard_event(
  259.       *q, CL_COMMAND_COPY_BUFFER, { deps, deps + num_deps },
  260.       hard_copy_op(q, dst_obj, { dst_offset },
  261.                    src_obj, { src_offset },
  262.                    { size, 1, 1 }));
  263.  
  264.    ret_object(ev, hev);
  265.    return CL_SUCCESS;
  266.  
  267. } catch (error &e) {
  268.    return e.get();
  269. }
  270.  
  271. PUBLIC cl_int
  272. clEnqueueCopyBufferRect(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
  273.                         const size_t *src_origin, const size_t *dst_origin,
  274.                         const size_t *region,
  275.                         size_t src_row_pitch, size_t src_slice_pitch,
  276.                         size_t dst_row_pitch, size_t dst_slice_pitch,
  277.                         cl_uint num_deps, const cl_event *deps,
  278.                         cl_event *ev) try {
  279.    validate_base(q, num_deps, deps);
  280.    validate_obj(q, src_obj);
  281.    validate_obj(q, dst_obj);
  282.  
  283.    hard_event *hev = new hard_event(
  284.       *q, CL_COMMAND_COPY_BUFFER_RECT, { deps, deps + num_deps },
  285.       soft_copy_op(q,
  286.                    dst_obj, dst_origin,
  287.                    { 1, dst_row_pitch, dst_slice_pitch },
  288.                    src_obj, src_origin,
  289.                    { 1, src_row_pitch, src_slice_pitch },
  290.                    region));
  291.  
  292.    ret_object(ev, hev);
  293.    return CL_SUCCESS;
  294.  
  295. } catch (error &e) {
  296.    return e.get();
  297. }
  298.  
  299. PUBLIC cl_int
  300. clEnqueueReadImage(cl_command_queue q, cl_mem obj, cl_bool blocking,
  301.                    const size_t *origin, const size_t *region,
  302.                    size_t row_pitch, size_t slice_pitch, void *ptr,
  303.                    cl_uint num_deps, const cl_event *deps,
  304.                    cl_event *ev) try {
  305.    image *img = dynamic_cast<image *>(obj);
  306.  
  307.    validate_base(q, num_deps, deps);
  308.    validate_obj(q, img);
  309.  
  310.    if (!ptr)
  311.       throw error(CL_INVALID_VALUE);
  312.  
  313.    hard_event *hev = new hard_event(
  314.       *q, CL_COMMAND_READ_IMAGE, { deps, deps + num_deps },
  315.       soft_copy_op(q,
  316.                    ptr, {},
  317.                    { 1, row_pitch, slice_pitch },
  318.                    obj, origin,
  319.                    { 1, img->row_pitch(), img->slice_pitch() },
  320.                    region));
  321.  
  322.    ret_object(ev, hev);
  323.    return CL_SUCCESS;
  324.  
  325. } catch (error &e) {
  326.    return e.get();
  327. }
  328.  
  329. PUBLIC cl_int
  330. clEnqueueWriteImage(cl_command_queue q, cl_mem obj, cl_bool blocking,
  331.                     const size_t *origin, const size_t *region,
  332.                     size_t row_pitch, size_t slice_pitch, const void *ptr,
  333.                     cl_uint num_deps, const cl_event *deps,
  334.                     cl_event *ev) try {
  335.    image *img = dynamic_cast<image *>(obj);
  336.  
  337.    validate_base(q, num_deps, deps);
  338.    validate_obj(q, img);
  339.  
  340.    if (!ptr)
  341.       throw error(CL_INVALID_VALUE);
  342.  
  343.    hard_event *hev = new hard_event(
  344.       *q, CL_COMMAND_WRITE_IMAGE, { deps, deps + num_deps },
  345.       soft_copy_op(q,
  346.                    obj, origin,
  347.                    { 1, img->row_pitch(), img->slice_pitch() },
  348.                    ptr, {},
  349.                    { 1, row_pitch, slice_pitch },
  350.                    region));
  351.  
  352.    ret_object(ev, hev);
  353.    return CL_SUCCESS;
  354.  
  355. } catch (error &e) {
  356.    return e.get();
  357. }
  358.  
  359. PUBLIC cl_int
  360. clEnqueueCopyImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
  361.                    const size_t *src_origin, const size_t *dst_origin,
  362.                    const size_t *region,
  363.                    cl_uint num_deps, const cl_event *deps,
  364.                    cl_event *ev) try {
  365.    image *src_img = dynamic_cast<image *>(src_obj);
  366.    image *dst_img = dynamic_cast<image *>(dst_obj);
  367.  
  368.    validate_base(q, num_deps, deps);
  369.    validate_obj(q, src_img);
  370.    validate_obj(q, dst_img);
  371.  
  372.    hard_event *hev = new hard_event(
  373.       *q, CL_COMMAND_COPY_IMAGE, { deps, deps + num_deps },
  374.       hard_copy_op(q, dst_obj, dst_origin, src_obj, src_origin, region));
  375.  
  376.    ret_object(ev, hev);
  377.    return CL_SUCCESS;
  378.  
  379. } catch (error &e) {
  380.    return e.get();
  381. }
  382.  
  383. PUBLIC cl_int
  384. clEnqueueCopyImageToBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
  385.                            const size_t *src_origin, const size_t *region,
  386.                            size_t dst_offset,
  387.                            cl_uint num_deps, const cl_event *deps,
  388.                            cl_event *ev) try {
  389.    image *src_img = dynamic_cast<image *>(src_obj);
  390.  
  391.    validate_base(q, num_deps, deps);
  392.    validate_obj(q, src_img);
  393.    validate_obj(q, dst_obj);
  394.  
  395.    hard_event *hev = new hard_event(
  396.       *q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, { deps, deps + num_deps },
  397.       soft_copy_op(q,
  398.                    dst_obj, { dst_offset },
  399.                    { 0, 0, 0 },
  400.                    src_obj, src_origin,
  401.                    { 1, src_img->row_pitch(), src_img->slice_pitch() },
  402.                    region));
  403.  
  404.    ret_object(ev, hev);
  405.    return CL_SUCCESS;
  406.  
  407. } catch (error &e) {
  408.    return e.get();
  409. }
  410.  
  411. PUBLIC cl_int
  412. clEnqueueCopyBufferToImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
  413.                            size_t src_offset,
  414.                            const size_t *dst_origin, const size_t *region,
  415.                            cl_uint num_deps, const cl_event *deps,
  416.                            cl_event *ev) try {
  417.    image *dst_img = dynamic_cast<image *>(dst_obj);
  418.  
  419.    validate_base(q, num_deps, deps);
  420.    validate_obj(q, src_obj);
  421.    validate_obj(q, dst_img);
  422.  
  423.    hard_event *hev = new hard_event(
  424.       *q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, { deps, deps + num_deps },
  425.       soft_copy_op(q,
  426.                    dst_obj, dst_origin,
  427.                    { 1, dst_img->row_pitch(), dst_img->slice_pitch() },
  428.                    src_obj, { src_offset },
  429.                    { 0, 0, 0 },
  430.                    region));
  431.  
  432.    ret_object(ev, hev);
  433.    return CL_SUCCESS;
  434.  
  435. } catch (error &e) {
  436.    return e.get();
  437. }
  438.  
  439. PUBLIC void *
  440. clEnqueueMapBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking,
  441.                    cl_map_flags flags, size_t offset, size_t size,
  442.                    cl_uint num_deps, const cl_event *deps,
  443.                    cl_event *ev, cl_int *errcode_ret) try {
  444.    validate_base(q, num_deps, deps);
  445.    validate_obj(q, obj);
  446.  
  447.    if (offset > obj->size() || offset + size > obj->size())
  448.       throw error(CL_INVALID_VALUE);
  449.  
  450.    void *map = obj->resource(q).add_map(
  451.       *q, flags, blocking, { offset }, { size });
  452.  
  453.    ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_BUFFER,
  454.                                  { deps, deps + num_deps }));
  455.    ret_error(errcode_ret, CL_SUCCESS);
  456.    return map;
  457.  
  458. } catch (error &e) {
  459.    ret_error(errcode_ret, e);
  460.    return NULL;
  461. }
  462.  
  463. PUBLIC void *
  464. clEnqueueMapImage(cl_command_queue q, cl_mem obj, cl_bool blocking,
  465.                   cl_map_flags flags,
  466.                   const size_t *origin, const size_t *region,
  467.                   size_t *row_pitch, size_t *slice_pitch,
  468.                   cl_uint num_deps, const cl_event *deps,
  469.                   cl_event *ev, cl_int *errcode_ret) try {
  470.    image *img = dynamic_cast<image *>(obj);
  471.  
  472.    validate_base(q, num_deps, deps);
  473.    validate_obj(q, img);
  474.  
  475.    void *map = obj->resource(q).add_map(
  476.       *q, flags, blocking, origin, region);
  477.  
  478.    ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_IMAGE,
  479.                                  { deps, deps + num_deps }));
  480.    ret_error(errcode_ret, CL_SUCCESS);
  481.    return map;
  482.  
  483. } catch (error &e) {
  484.    ret_error(errcode_ret, e);
  485.    return NULL;
  486. }
  487.  
  488. PUBLIC cl_int
  489. clEnqueueUnmapMemObject(cl_command_queue q, cl_mem obj, void *ptr,
  490.                         cl_uint num_deps, const cl_event *deps,
  491.                         cl_event *ev) try {
  492.    validate_base(q, num_deps, deps);
  493.    validate_obj(q, obj);
  494.  
  495.    hard_event *hev = new hard_event(
  496.       *q, CL_COMMAND_UNMAP_MEM_OBJECT, { deps, deps + num_deps },
  497.       [=](event &) {
  498.          obj->resource(q).del_map(ptr);
  499.       });
  500.  
  501.    ret_object(ev, hev);
  502.    return CL_SUCCESS;
  503.  
  504. } catch (error &e) {
  505.    return e.get();
  506. }
  507.