Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. //
  2. // Copyright 2012 Francisco Jerez
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a
  5. // copy of this software and associated documentation files (the "Software"),
  6. // to deal in the Software without restriction, including without limitation
  7. // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. // and/or sell copies of the Software, and to permit persons to whom the
  9. // Software is furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17. // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. // OTHER DEALINGS IN THE SOFTWARE.
  21. //
  22.  
  23. #include "api/util.hpp"
  24. #include "core/kernel.hpp"
  25. #include "core/event.hpp"
  26.  
  27. using namespace clover;
  28.  
  29. CLOVER_API cl_kernel
  30. clCreateKernel(cl_program d_prog, const char *name, cl_int *r_errcode) try {
  31.    auto &prog = obj(d_prog);
  32.  
  33.    if (!name)
  34.       throw error(CL_INVALID_VALUE);
  35.  
  36.    auto &sym = find(name_equals(name), prog.symbols());
  37.  
  38.    ret_error(r_errcode, CL_SUCCESS);
  39.    return new kernel(prog, name, range(sym.args));
  40.  
  41. } catch (std::out_of_range &e) {
  42.    ret_error(r_errcode, CL_INVALID_KERNEL_NAME);
  43.    return NULL;
  44.  
  45. } catch (error &e) {
  46.    ret_error(r_errcode, e);
  47.    return NULL;
  48. }
  49.  
  50. CLOVER_API cl_int
  51. clCreateKernelsInProgram(cl_program d_prog, cl_uint count,
  52.                          cl_kernel *rd_kerns, cl_uint *r_count) try {
  53.    auto &prog = obj(d_prog);
  54.    auto &syms = prog.symbols();
  55.  
  56.    if (rd_kerns && count < syms.size())
  57.       throw error(CL_INVALID_VALUE);
  58.  
  59.    if (rd_kerns)
  60.       copy(map([&](const module::symbol &sym) {
  61.                return desc(new kernel(prog,
  62.                                       std::string(sym.name.begin(),
  63.                                                   sym.name.end()),
  64.                                       range(sym.args)));
  65.             }, syms),
  66.          rd_kerns);
  67.  
  68.    if (r_count)
  69.       *r_count = syms.size();
  70.  
  71.    return CL_SUCCESS;
  72.  
  73. } catch (error &e) {
  74.    return e.get();
  75. }
  76.  
  77. CLOVER_API cl_int
  78. clRetainKernel(cl_kernel d_kern) try {
  79.    obj(d_kern).retain();
  80.    return CL_SUCCESS;
  81.  
  82. } catch (error &e) {
  83.    return e.get();
  84. }
  85.  
  86. CLOVER_API cl_int
  87. clReleaseKernel(cl_kernel d_kern) try {
  88.    if (obj(d_kern).release())
  89.       delete pobj(d_kern);
  90.  
  91.    return CL_SUCCESS;
  92.  
  93. } catch (error &e) {
  94.    return e.get();
  95. }
  96.  
  97. CLOVER_API cl_int
  98. clSetKernelArg(cl_kernel d_kern, cl_uint idx, size_t size,
  99.                const void *value) try {
  100.    obj(d_kern).args().at(idx).set(size, value);
  101.    return CL_SUCCESS;
  102.  
  103. } catch (std::out_of_range &e) {
  104.    return CL_INVALID_ARG_INDEX;
  105.  
  106. } catch (error &e) {
  107.    return e.get();
  108. }
  109.  
  110. CLOVER_API cl_int
  111. clGetKernelInfo(cl_kernel d_kern, cl_kernel_info param,
  112.                 size_t size, void *r_buf, size_t *r_size) try {
  113.    property_buffer buf { r_buf, size, r_size };
  114.    auto &kern = obj(d_kern);
  115.  
  116.    switch (param) {
  117.    case CL_KERNEL_FUNCTION_NAME:
  118.       buf.as_string() = kern.name();
  119.       break;
  120.  
  121.    case CL_KERNEL_NUM_ARGS:
  122.       buf.as_scalar<cl_uint>() = kern.args().size();
  123.       break;
  124.  
  125.    case CL_KERNEL_REFERENCE_COUNT:
  126.       buf.as_scalar<cl_uint>() = kern.ref_count();
  127.       break;
  128.  
  129.    case CL_KERNEL_CONTEXT:
  130.       buf.as_scalar<cl_context>() = desc(kern.program().context());
  131.       break;
  132.  
  133.    case CL_KERNEL_PROGRAM:
  134.       buf.as_scalar<cl_program>() = desc(kern.program());
  135.       break;
  136.  
  137.    default:
  138.       throw error(CL_INVALID_VALUE);
  139.    }
  140.  
  141.    return CL_SUCCESS;
  142.  
  143. } catch (error &e) {
  144.    return e.get();
  145. }
  146.  
  147. CLOVER_API cl_int
  148. clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev,
  149.                          cl_kernel_work_group_info param,
  150.                          size_t size, void *r_buf, size_t *r_size) try {
  151.    property_buffer buf { r_buf, size, r_size };
  152.    auto &kern = obj(d_kern);
  153.    auto &dev = (d_dev ? *pobj(d_dev) : unique(kern.program().devices()));
  154.  
  155.    if (!count(dev, kern.program().devices()))
  156.       throw error(CL_INVALID_DEVICE);
  157.  
  158.    switch (param) {
  159.    case CL_KERNEL_WORK_GROUP_SIZE:
  160.       buf.as_scalar<size_t>() = dev.max_threads_per_block();
  161.       break;
  162.  
  163.    case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
  164.       buf.as_vector<size_t>() = kern.required_block_size();
  165.       break;
  166.  
  167.    case CL_KERNEL_LOCAL_MEM_SIZE:
  168.       buf.as_scalar<cl_ulong>() = kern.mem_local();
  169.       break;
  170.  
  171.    case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
  172.       buf.as_scalar<size_t>() = 1;
  173.       break;
  174.  
  175.    case CL_KERNEL_PRIVATE_MEM_SIZE:
  176.       buf.as_scalar<cl_ulong>() = kern.mem_private();
  177.       break;
  178.  
  179.    default:
  180.       throw error(CL_INVALID_VALUE);
  181.    }
  182.  
  183.    return CL_SUCCESS;
  184.  
  185. } catch (error &e) {
  186.    return e.get();
  187.  
  188. } catch (std::out_of_range &e) {
  189.    return CL_INVALID_DEVICE;
  190. }
  191.  
  192. namespace {
  193.    ///
  194.    /// Common argument checking shared by kernel invocation commands.
  195.    ///
  196.    void
  197.    validate_common(const command_queue &q, kernel &kern,
  198.                    const ref_vector<event> &deps) {
  199.       if (kern.program().context() != q.context() ||
  200.           any_of([&](const event &ev) {
  201.                 return ev.context() != q.context();
  202.              }, deps))
  203.          throw error(CL_INVALID_CONTEXT);
  204.  
  205.       if (any_of([](kernel::argument &arg) {
  206.                return !arg.set();
  207.             }, kern.args()))
  208.          throw error(CL_INVALID_KERNEL_ARGS);
  209.  
  210.       if (!count(q.device(), kern.program().devices()))
  211.          throw error(CL_INVALID_PROGRAM_EXECUTABLE);
  212.    }
  213.  
  214.    std::vector<size_t>
  215.    validate_grid_size(const command_queue &q, cl_uint dims,
  216.                       const size_t *d_grid_size) {
  217.       auto grid_size = range(d_grid_size, dims);
  218.  
  219.       if (dims < 1 || dims > q.device().max_block_size().size())
  220.          throw error(CL_INVALID_WORK_DIMENSION);
  221.  
  222.       if (!d_grid_size || any_of(is_zero(), grid_size))
  223.          throw error(CL_INVALID_GLOBAL_WORK_SIZE);
  224.  
  225.       return grid_size;
  226.    }
  227.  
  228.    std::vector<size_t>
  229.    validate_grid_offset(const command_queue &q, cl_uint dims,
  230.                         const size_t *d_grid_offset) {
  231.       if (d_grid_offset)
  232.          return range(d_grid_offset, dims);
  233.       else
  234.          return std::vector<size_t>(dims, 0);
  235.    }
  236.  
  237.    std::vector<size_t>
  238.    validate_block_size(const command_queue &q, const kernel &kern,
  239.                        cl_uint dims, const size_t *d_grid_size,
  240.                        const size_t *d_block_size) {
  241.       auto grid_size = range(d_grid_size, dims);
  242.  
  243.       if (d_block_size) {
  244.          auto block_size = range(d_block_size, dims);
  245.  
  246.          if (any_of(is_zero(), block_size) ||
  247.              any_of(greater(), block_size, q.device().max_block_size()))
  248.             throw error(CL_INVALID_WORK_ITEM_SIZE);
  249.  
  250.          if (any_of(modulus(), grid_size, block_size))
  251.             throw error(CL_INVALID_WORK_GROUP_SIZE);
  252.  
  253.          if (fold(multiplies(), 1u, block_size) >
  254.              q.device().max_threads_per_block())
  255.             throw error(CL_INVALID_WORK_GROUP_SIZE);
  256.  
  257.          return block_size;
  258.  
  259.       } else {
  260.          return kern.optimal_block_size(q, grid_size);
  261.       }
  262.    }
  263. }
  264.  
  265. CLOVER_API cl_int
  266. clEnqueueNDRangeKernel(cl_command_queue d_q, cl_kernel d_kern,
  267.                        cl_uint dims, const size_t *d_grid_offset,
  268.                        const size_t *d_grid_size, const size_t *d_block_size,
  269.                        cl_uint num_deps, const cl_event *d_deps,
  270.                        cl_event *rd_ev) try {
  271.    auto &q = obj(d_q);
  272.    auto &kern = obj(d_kern);
  273.    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  274.    auto grid_size = validate_grid_size(q, dims, d_grid_size);
  275.    auto grid_offset = validate_grid_offset(q, dims, d_grid_offset);
  276.    auto block_size = validate_block_size(q, kern, dims,
  277.                                          d_grid_size, d_block_size);
  278.  
  279.    validate_common(q, kern, deps);
  280.  
  281.    auto hev = create<hard_event>(
  282.       q, CL_COMMAND_NDRANGE_KERNEL, deps,
  283.       [=, &kern, &q](event &) {
  284.          kern.launch(q, grid_offset, grid_size, block_size);
  285.       });
  286.  
  287.    ret_object(rd_ev, hev);
  288.    return CL_SUCCESS;
  289.  
  290. } catch (error &e) {
  291.    return e.get();
  292. }
  293.  
  294. CLOVER_API cl_int
  295. clEnqueueTask(cl_command_queue d_q, cl_kernel d_kern,
  296.               cl_uint num_deps, const cl_event *d_deps,
  297.               cl_event *rd_ev) try {
  298.    auto &q = obj(d_q);
  299.    auto &kern = obj(d_kern);
  300.    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  301.  
  302.    validate_common(q, kern, deps);
  303.  
  304.    auto hev = create<hard_event>(
  305.       q, CL_COMMAND_TASK, deps,
  306.       [=, &kern, &q](event &) {
  307.          kern.launch(q, { 0 }, { 1 }, { 1 });
  308.       });
  309.  
  310.    ret_object(rd_ev, hev);
  311.    return CL_SUCCESS;
  312.  
  313. } catch (error &e) {
  314.    return e.get();
  315. }
  316.  
  317. CLOVER_API cl_int
  318. clEnqueueNativeKernel(cl_command_queue d_q, void (*func)(void *),
  319.                       void *args, size_t args_size,
  320.                       cl_uint num_mems, const cl_mem *d_mems,
  321.                       const void **mem_handles, cl_uint num_deps,
  322.                       const cl_event *d_deps, cl_event *rd_ev) {
  323.    return CL_INVALID_OPERATION;
  324. }
  325.