Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. //
  2. // Copyright 2012 Francisco Jerez
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a
  5. // copy of this software and associated documentation files (the "Software"),
  6. // to deal in the Software without restriction, including without limitation
  7. // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. // and/or sell copies of the Software, and to permit persons to whom the
  9. // Software is furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17. // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. // OTHER DEALINGS IN THE SOFTWARE.
  21. //
  22.  
  23. #include "api/util.hpp"
  24. #include "core/kernel.hpp"
  25. #include "core/event.hpp"
  26.  
  27. using namespace clover;
  28.  
  29. PUBLIC cl_kernel
  30. clCreateKernel(cl_program prog, const char *name,
  31.                cl_int *errcode_ret) try {
  32.    if (!prog)
  33.       throw error(CL_INVALID_PROGRAM);
  34.  
  35.    if (!name)
  36.       throw error(CL_INVALID_VALUE);
  37.  
  38.    if (prog->binaries().empty())
  39.       throw error(CL_INVALID_PROGRAM_EXECUTABLE);
  40.  
  41.    auto sym = prog->binaries().begin()->second.sym(name);
  42.  
  43.    ret_error(errcode_ret, CL_SUCCESS);
  44.    return new kernel(*prog, name, { sym.args.begin(), sym.args.end() });
  45.  
  46. } catch (module::noent_error &e) {
  47.    ret_error(errcode_ret, CL_INVALID_KERNEL_NAME);
  48.    return NULL;
  49.  
  50. } catch(error &e) {
  51.    ret_error(errcode_ret, e);
  52.    return NULL;
  53. }
  54.  
  55. PUBLIC cl_int
  56. clCreateKernelsInProgram(cl_program prog, cl_uint count,
  57.                          cl_kernel *kerns, cl_uint *count_ret) {
  58.    if (!prog)
  59.       throw error(CL_INVALID_PROGRAM);
  60.  
  61.    if (prog->binaries().empty())
  62.       throw error(CL_INVALID_PROGRAM_EXECUTABLE);
  63.  
  64.    auto &syms = prog->binaries().begin()->second.syms;
  65.  
  66.    if (kerns && count < syms.size())
  67.       throw error(CL_INVALID_VALUE);
  68.  
  69.    if (kerns)
  70.       std::transform(syms.begin(), syms.end(), kerns,
  71.                      [=](const module::symbol &sym) {
  72.                         return new kernel(*prog, compat::string(sym.name),
  73.                                           { sym.args.begin(), sym.args.end() });
  74.                      });
  75.  
  76.    if (count_ret)
  77.       *count_ret = syms.size();
  78.  
  79.    return CL_SUCCESS;
  80. }
  81.  
  82. PUBLIC cl_int
  83. clRetainKernel(cl_kernel kern) {
  84.    if (!kern)
  85.       return CL_INVALID_KERNEL;
  86.  
  87.    kern->retain();
  88.    return CL_SUCCESS;
  89. }
  90.  
  91. PUBLIC cl_int
  92. clReleaseKernel(cl_kernel kern) {
  93.    if (!kern)
  94.       return CL_INVALID_KERNEL;
  95.  
  96.    if (kern->release())
  97.       delete kern;
  98.  
  99.    return CL_SUCCESS;
  100. }
  101.  
  102. PUBLIC cl_int
  103. clSetKernelArg(cl_kernel kern, cl_uint idx, size_t size,
  104.                const void *value) try {
  105.    if (!kern)
  106.       throw error(CL_INVALID_KERNEL);
  107.  
  108.    if (idx >= kern->args.size())
  109.       throw error(CL_INVALID_ARG_INDEX);
  110.  
  111.    kern->args[idx]->set(size, value);
  112.  
  113.    return CL_SUCCESS;
  114.  
  115. } catch(error &e) {
  116.    return e.get();
  117. }
  118.  
  119. PUBLIC cl_int
  120. clGetKernelInfo(cl_kernel kern, cl_kernel_info param,
  121.                 size_t size, void *buf, size_t *size_ret) {
  122.    if (!kern)
  123.       return CL_INVALID_KERNEL;
  124.  
  125.    switch (param) {
  126.    case CL_KERNEL_FUNCTION_NAME:
  127.       return string_property(buf, size, size_ret, kern->name());
  128.  
  129.    case CL_KERNEL_NUM_ARGS:
  130.       return scalar_property<cl_uint>(buf, size, size_ret,
  131.                                       kern->args.size());
  132.  
  133.    case CL_KERNEL_REFERENCE_COUNT:
  134.       return scalar_property<cl_uint>(buf, size, size_ret,
  135.                                       kern->ref_count());
  136.  
  137.    case CL_KERNEL_CONTEXT:
  138.       return scalar_property<cl_context>(buf, size, size_ret,
  139.                                          &kern->prog.ctx);
  140.  
  141.    case CL_KERNEL_PROGRAM:
  142.       return scalar_property<cl_program>(buf, size, size_ret,
  143.                                          &kern->prog);
  144.  
  145.    default:
  146.       return CL_INVALID_VALUE;
  147.    }
  148. }
  149.  
  150. PUBLIC cl_int
  151. clGetKernelWorkGroupInfo(cl_kernel kern, cl_device_id dev,
  152.                          cl_kernel_work_group_info param,
  153.                          size_t size, void *buf, size_t *size_ret) {
  154.    if (!kern)
  155.       return CL_INVALID_KERNEL;
  156.  
  157.    if ((!dev && kern->prog.binaries().size() != 1) ||
  158.        (dev && !kern->prog.binaries().count(dev)))
  159.       return CL_INVALID_DEVICE;
  160.  
  161.    switch (param) {
  162.    case CL_KERNEL_WORK_GROUP_SIZE:
  163.       return scalar_property<size_t>(buf, size, size_ret,
  164.                                      kern->max_block_size());
  165.  
  166.    case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
  167.       return vector_property<size_t>(buf, size, size_ret,
  168.                                      kern->block_size());
  169.  
  170.    case CL_KERNEL_LOCAL_MEM_SIZE:
  171.       return scalar_property<cl_ulong>(buf, size, size_ret,
  172.                                        kern->mem_local());
  173.  
  174.    case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
  175.       return scalar_property<size_t>(buf, size, size_ret, 1);
  176.  
  177.    case CL_KERNEL_PRIVATE_MEM_SIZE:
  178.       return scalar_property<cl_ulong>(buf, size, size_ret,
  179.                                        kern->mem_private());
  180.  
  181.    default:
  182.       return CL_INVALID_VALUE;
  183.    }
  184. }
  185.  
  186. namespace {
  187.    ///
  188.    /// Common argument checking shared by kernel invocation commands.
  189.    ///
  190.    void
  191.    kernel_validate(cl_command_queue q, cl_kernel kern,
  192.                    cl_uint dims, const size_t *grid_offset,
  193.                    const size_t *grid_size, const size_t *block_size,
  194.                    cl_uint num_deps, const cl_event *deps,
  195.                    cl_event *ev) {
  196.       if (!q)
  197.          throw error(CL_INVALID_COMMAND_QUEUE);
  198.  
  199.       if (!kern)
  200.          throw error(CL_INVALID_KERNEL);
  201.  
  202.       if (&kern->prog.ctx != &q->ctx ||
  203.           any_of([&](const cl_event ev) {
  204.                 return &ev->ctx != &q->ctx;
  205.              }, deps, deps + num_deps))
  206.          throw error(CL_INVALID_CONTEXT);
  207.  
  208.       if (bool(num_deps) != bool(deps) ||
  209.           any_of(is_zero<cl_event>, deps, deps + num_deps))
  210.          throw error(CL_INVALID_EVENT_WAIT_LIST);
  211.  
  212.       if (any_of([](std::unique_ptr<kernel::argument> &arg) {
  213.                return !arg->set();
  214.             }, kern->args.begin(), kern->args.end()))
  215.          throw error(CL_INVALID_KERNEL_ARGS);
  216.  
  217.       if (!kern->prog.binaries().count(&q->dev))
  218.          throw error(CL_INVALID_PROGRAM_EXECUTABLE);
  219.  
  220.       if (dims < 1 || dims > q->dev.max_block_size().size())
  221.          throw error(CL_INVALID_WORK_DIMENSION);
  222.  
  223.       if (!grid_size || any_of(is_zero<size_t>, grid_size, grid_size + dims))
  224.          throw error(CL_INVALID_GLOBAL_WORK_SIZE);
  225.  
  226.       if (block_size) {
  227.          if (any_of([](size_t b, size_t max) {
  228.                   return b == 0 || b > max;
  229.                }, block_size, block_size + dims,
  230.                q->dev.max_block_size().begin()))
  231.             throw error(CL_INVALID_WORK_ITEM_SIZE);
  232.  
  233.          if (any_of([](size_t b, size_t g) {
  234.                   return g % b;
  235.                }, block_size, block_size + dims, grid_size))
  236.             throw error(CL_INVALID_WORK_GROUP_SIZE);
  237.  
  238.          if (fold(std::multiplies<size_t>(), 1u,
  239.                   block_size, block_size + dims) >
  240.              q->dev.max_threads_per_block())
  241.             throw error(CL_INVALID_WORK_GROUP_SIZE);
  242.       }
  243.    }
  244.  
  245.    ///
  246.    /// Common event action shared by kernel invocation commands.
  247.    ///
  248.    std::function<void (event &)>
  249.    kernel_op(cl_command_queue q, cl_kernel kern,
  250.              const std::vector<size_t> &grid_offset,
  251.              const std::vector<size_t> &grid_size,
  252.              const std::vector<size_t> &block_size) {
  253.       const std::vector<size_t> reduced_grid_size = map(
  254.          std::divides<size_t>(), grid_size.begin(), grid_size.end(),
  255.          block_size.begin());
  256.  
  257.       return [=](event &) {
  258.          kern->launch(*q, grid_offset, reduced_grid_size, block_size);
  259.       };
  260.    }
  261.  
  262.    std::vector<size_t>
  263.    opt_vector(const size_t *p, unsigned n, size_t x) {
  264.       if (p)
  265.          return { p, p + n };
  266.       else
  267.          return { n, x };
  268.    }
  269. }
  270.  
  271. PUBLIC cl_int
  272. clEnqueueNDRangeKernel(cl_command_queue q, cl_kernel kern,
  273.                        cl_uint dims, const size_t *pgrid_offset,
  274.                        const size_t *pgrid_size, const size_t *pblock_size,
  275.                        cl_uint num_deps, const cl_event *deps,
  276.                        cl_event *ev) try {
  277.    auto grid_offset = opt_vector(pgrid_offset, dims, 0);
  278.    auto grid_size = opt_vector(pgrid_size, dims, 1);
  279.    auto block_size = opt_vector(pblock_size, dims, 1);
  280.  
  281.    kernel_validate(q, kern, dims, pgrid_offset, pgrid_size, pblock_size,
  282.                    num_deps, deps, ev);
  283.  
  284.    hard_event *hev = new hard_event(
  285.       *q, CL_COMMAND_NDRANGE_KERNEL, { deps, deps + num_deps },
  286.       kernel_op(q, kern, grid_offset, grid_size, block_size));
  287.  
  288.    ret_object(ev, hev);
  289.    return CL_SUCCESS;
  290.  
  291. } catch(error &e) {
  292.    return e.get();
  293. }
  294.  
  295. PUBLIC cl_int
  296. clEnqueueTask(cl_command_queue q, cl_kernel kern,
  297.               cl_uint num_deps, const cl_event *deps,
  298.               cl_event *ev) try {
  299.    const std::vector<size_t> grid_offset = { 0 };
  300.    const std::vector<size_t> grid_size = { 1 };
  301.    const std::vector<size_t> block_size = { 1 };
  302.  
  303.    kernel_validate(q, kern, 1, grid_offset.data(), grid_size.data(),
  304.                    block_size.data(), num_deps, deps, ev);
  305.  
  306.    hard_event *hev = new hard_event(
  307.       *q, CL_COMMAND_TASK, { deps, deps + num_deps },
  308.       kernel_op(q, kern, grid_offset, grid_size, block_size));
  309.  
  310.    ret_object(ev, hev);
  311.    return CL_SUCCESS;
  312.  
  313. } catch(error &e) {
  314.    return e.get();
  315. }
  316.  
  317. PUBLIC cl_int
  318. clEnqueueNativeKernel(cl_command_queue q, void (*func)(void *),
  319.                       void *args, size_t args_size,
  320.                       cl_uint obj_count, const cl_mem *obj_list,
  321.                       const void **obj_args, cl_uint num_deps,
  322.                       const cl_event *deps, cl_event *ev) {
  323.    return CL_INVALID_OPERATION;
  324. }
  325.