Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. #! /usr/bin/env python
  2. #
  3. # Copyright (C) 2014 Connor Abbott
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a
  6. # copy of this software and associated documentation files (the "Software"),
  7. # to deal in the Software without restriction, including without limitation
  8. # the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. # and/or sell copies of the Software, and to permit persons to whom the
  10. # Software is furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice (including the next
  13. # paragraph) shall be included in all copies or substantial portions of the
  14. # Software.
  15. #
  16. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19. # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21. # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22. # IN THE SOFTWARE.
  23. #
  24. # Authors:
  25. #    Connor Abbott (cwabbott0@gmail.com)
  26.  
  27.  
  28. # Class that represents all the information we have about the opcode
  29. # NOTE: this must be kept in sync with nir_op_info
  30.  
  31. class Opcode(object):
  32.    """Class that represents all the information we have about the opcode
  33.   NOTE: this must be kept in sync with nir_op_info
  34.   """
  35.    def __init__(self, name, output_size, output_type, input_sizes,
  36.                 input_types, algebraic_properties, const_expr):
  37.       """Parameters:
  38.  
  39.      - name is the name of the opcode (prepend nir_op_ for the enum name)
  40.      - all types are strings that get nir_type_ prepended to them
  41.      - input_types is a list of types
  42.      - algebraic_properties is a space-seperated string, where nir_op_is_ is
  43.        prepended before each entry
  44.      - const_expr is an expression or series of statements that computes the
  45.        constant value of the opcode given the constant values of its inputs.
  46.  
  47.      Constant expressions are formed from the variables src0, src1, ...,
  48.      src(N-1), where N is the number of arguments.  The output of the
  49.      expression should be stored in the dst variable.  Per-component input
  50.      and output variables will be scalars and non-per-component input and
  51.      output variables will be a struct with fields named x, y, z, and w
  52.      all of the correct type.  Input and output variables can be assumed
  53.      to already be of the correct type and need no conversion.  In
  54.      particular, the conversion from the C bool type to/from  NIR_TRUE and
  55.      NIR_FALSE happens automatically.
  56.  
  57.      For per-component instructions, the entire expression will be
  58.      executed once for each component.  For non-per-component
  59.      instructions, the expression is expected to store the correct values
  60.      in dst.x, dst.y, etc.  If "dst" does not exist anywhere in the
  61.      constant expression, an assignment to dst will happen automatically
  62.      and the result will be equivalent to "dst = <expression>" for
  63.      per-component instructions and "dst.x = dst.y = ... = <expression>"
  64.      for non-per-component instructions.
  65.      """
  66.       assert isinstance(name, str)
  67.       assert isinstance(output_size, int)
  68.       assert isinstance(output_type, str)
  69.       assert isinstance(input_sizes, list)
  70.       assert isinstance(input_sizes[0], int)
  71.       assert isinstance(input_types, list)
  72.       assert isinstance(input_types[0], str)
  73.       assert isinstance(algebraic_properties, str)
  74.       assert isinstance(const_expr, str)
  75.       assert len(input_sizes) == len(input_types)
  76.       assert 0 <= output_size <= 4
  77.       for size in input_sizes:
  78.          assert 0 <= size <= 4
  79.          if output_size != 0:
  80.             assert size != 0
  81.       self.name = name
  82.       self.num_inputs = len(input_sizes)
  83.       self.output_size = output_size
  84.       self.output_type = output_type
  85.       self.input_sizes = input_sizes
  86.       self.input_types = input_types
  87.       self.algebraic_properties = algebraic_properties
  88.       self.const_expr = const_expr
  89.  
  90. # helper variables for strings
  91. tfloat = "float"
  92. tint = "int"
  93. tbool = "bool"
  94. tunsigned = "unsigned"
  95.  
  96. commutative = "commutative "
  97. associative = "associative "
  98.  
  99. # global dictionary of opcodes
  100. opcodes = {}
  101.  
  102. def opcode(name, output_size, output_type, input_sizes, input_types,
  103.            algebraic_properties, const_expr):
  104.    assert name not in opcodes
  105.    opcodes[name] = Opcode(name, output_size, output_type, input_sizes,
  106.                           input_types, algebraic_properties, const_expr)
  107.  
  108. def unop_convert(name, in_type, out_type, const_expr):
  109.    opcode(name, 0, out_type, [0], [in_type], "", const_expr)
  110.  
  111. def unop(name, ty, const_expr):
  112.    opcode(name, 0, ty, [0], [ty], "", const_expr)
  113.  
  114. def unop_horiz(name, output_size, output_type, input_size, input_type,
  115.                const_expr):
  116.    opcode(name, output_size, output_type, [input_size], [input_type], "",
  117.           const_expr)
  118.  
  119. def unop_reduce(name, output_size, output_type, input_type, prereduce_expr,
  120.                 reduce_expr, final_expr):
  121.    def prereduce(src):
  122.       return "(" + prereduce_expr.format(src=src) + ")"
  123.    def final(src):
  124.       return final_expr.format(src="(" + src + ")")
  125.    def reduce_(src0, src1):
  126.       return reduce_expr.format(src0=src0, src1=src1)
  127.    src0 = prereduce("src0.x")
  128.    src1 = prereduce("src0.y")
  129.    src2 = prereduce("src0.z")
  130.    src3 = prereduce("src0.w")
  131.    unop_horiz(name + "2", output_size, output_type, 2, input_type,
  132.               final(reduce_(src0, src1)))
  133.    unop_horiz(name + "3", output_size, output_type, 3, input_type,
  134.               final(reduce_(reduce_(src0, src1), src2)))
  135.    unop_horiz(name + "4", output_size, output_type, 4, input_type,
  136.               final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
  137.  
  138.  
  139. # These two move instructions differ in what modifiers they support and what
  140. # the negate modifier means. Otherwise, they are identical.
  141. unop("fmov", tfloat, "src0")
  142. unop("imov", tint, "src0")
  143.  
  144. unop("ineg", tint, "-src0")
  145. unop("fneg", tfloat, "-src0")
  146. unop("inot", tint, "~src0") # invert every bit of the integer
  147. unop("fnot", tfloat, "(src0 == 0.0f) ? 1.0f : 0.0f")
  148. unop("fsign", tfloat, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)")
  149. unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
  150. unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
  151. unop("fabs", tfloat, "fabsf(src0)")
  152. unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)")
  153. unop("frcp", tfloat, "1.0f / src0")
  154. unop("frsq", tfloat, "1.0f / sqrtf(src0)")
  155. unop("fsqrt", tfloat, "sqrtf(src0)")
  156. unop("fexp2", tfloat, "exp2f(src0)")
  157. unop("flog2", tfloat, "log2f(src0)")
  158. unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
  159. unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion
  160. unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
  161. # Float-to-boolean conversion
  162. unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
  163. # Boolean-to-float conversion
  164. unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
  165. # Int-to-boolean conversion
  166. unop_convert("i2b", tint, tbool, "src0 != 0")
  167. unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
  168. unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion.
  169.  
  170. unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}")
  171. unop_reduce("ball", 1, tbool, tbool, "{src}", "{src0} && {src1}", "{src}")
  172. unop_reduce("fany", 1, tfloat, tfloat, "{src} != 0.0f", "{src0} || {src1}",
  173.             "{src} ? 1.0f : 0.0f")
  174. unop_reduce("fall", 1, tfloat, tfloat, "{src} != 0.0f", "{src0} && {src1}",
  175.             "{src} ? 1.0f : 0.0f")
  176.  
  177. # Unary floating-point rounding operations.
  178.  
  179.  
  180. unop("ftrunc", tfloat, "truncf(src0)")
  181. unop("fceil", tfloat, "ceilf(src0)")
  182. unop("ffloor", tfloat, "floorf(src0)")
  183. unop("ffract", tfloat, "src0 - floorf(src0)")
  184. unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
  185.  
  186.  
  187. # Trigonometric operations.
  188.  
  189.  
  190. unop("fsin", tfloat, "sinf(src0)")
  191. unop("fcos", tfloat, "cosf(src0)")
  192.  
  193.  
  194. # Partial derivatives.
  195.  
  196.  
  197. unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0.
  198. unop("fddy", tfloat, "0.0f")
  199. unop("fddx_fine", tfloat, "0.0f")
  200. unop("fddy_fine", tfloat, "0.0f")
  201. unop("fddx_coarse", tfloat, "0.0f")
  202. unop("fddy_coarse", tfloat, "0.0f")
  203.  
  204.  
  205. # Floating point pack and unpack operations.
  206.  
  207. def pack_2x16(fmt):
  208.    unop_horiz("pack_" + fmt + "_2x16", 1, tunsigned, 2, tfloat, """
  209. dst.x = (uint32_t) pack_fmt_1x16(src0.x);
  210. dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
  211. """.replace("fmt", fmt))
  212.  
  213. def pack_4x8(fmt):
  214.    unop_horiz("pack_" + fmt + "_4x8", 1, tunsigned, 4, tfloat, """
  215. dst.x = (uint32_t) pack_fmt_1x8(src0.x);
  216. dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
  217. dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
  218. dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
  219. """.replace("fmt", fmt))
  220.  
  221. def unpack_2x16(fmt):
  222.    unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tunsigned, """
  223. dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
  224. dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
  225. """.replace("fmt", fmt))
  226.  
  227. def unpack_4x8(fmt):
  228.    unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tunsigned, """
  229. dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
  230. dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
  231. dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
  232. dst.w = unpack_fmt_1x8((uint8_t)(src0.x >> 24));
  233. """.replace("fmt", fmt))
  234.  
  235.  
  236. pack_2x16("snorm")
  237. pack_4x8("snorm")
  238. pack_2x16("unorm")
  239. pack_4x8("unorm")
  240. pack_2x16("half")
  241. unpack_2x16("snorm")
  242. unpack_4x8("snorm")
  243. unpack_2x16("unorm")
  244. unpack_4x8("unorm")
  245. unpack_2x16("half")
  246.  
  247.  
  248. # Lowered floating point unpacking operations.
  249.  
  250.  
  251. unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tunsigned,
  252.            "unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
  253. unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tunsigned,
  254.            "unpack_half_1x16((uint16_t)(src0.x >> 16))")
  255.  
  256.  
  257. # Bit operations, part of ARB_gpu_shader5.
  258.  
  259.  
  260. unop("bitfield_reverse", tunsigned, """
  261. /* we're not winning any awards for speed here, but that's ok */
  262. dst = 0;
  263. for (unsigned bit = 0; bit < 32; bit++)
  264.   dst |= ((src0 >> bit) & 1) << (31 - bit);
  265. """)
  266. unop("bit_count", tunsigned, """
  267. dst = 0;
  268. for (unsigned bit = 0; bit < 32; bit++) {
  269.   if ((src0 >> bit) & 1)
  270.      dst++;
  271. }
  272. """)
  273.  
  274. unop_convert("ufind_msb", tunsigned, tint, """
  275. dst = -1;
  276. for (int bit = 31; bit > 0; bit--) {
  277.   if ((src0 >> bit) & 1) {
  278.      dst = bit;
  279.      break;
  280.   }
  281. }
  282. """)
  283.  
  284. unop("ifind_msb", tint, """
  285. dst = -1;
  286. for (int bit = 31; bit >= 0; bit--) {
  287.   /* If src0 < 0, we're looking for the first 0 bit.
  288.    * if src0 >= 0, we're looking for the first 1 bit.
  289.    */
  290.   if ((((src0 >> bit) & 1) && (src0 >= 0)) ||
  291.      (!((src0 >> bit) & 1) && (src0 < 0))) {
  292.      dst = bit;
  293.      break;
  294.   }
  295. }
  296. """)
  297.  
  298. unop("find_lsb", tint, """
  299. dst = -1;
  300. for (unsigned bit = 0; bit < 32; bit++) {
  301.   if ((src0 >> bit) & 1) {
  302.      dst = bit;
  303.      break;
  304.   }
  305. }
  306. """)
  307.  
  308.  
  309. for i in xrange(1, 5):
  310.    for j in xrange(1, 5):
  311.       unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f")
  312.  
  313. def binop_convert(name, out_type, in_type, alg_props, const_expr):
  314.    opcode(name, 0, out_type, [0, 0], [in_type, in_type], alg_props, const_expr)
  315.  
  316. def binop(name, ty, alg_props, const_expr):
  317.    binop_convert(name, ty, ty, alg_props, const_expr)
  318.  
  319. def binop_compare(name, ty, alg_props, const_expr):
  320.    binop_convert(name, tbool, ty, alg_props, const_expr)
  321.  
  322. def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size,
  323.                 src2_type, const_expr):
  324.    opcode(name, out_size, out_type, [src1_size, src2_size], [src1_type, src2_type],
  325.           "", const_expr)
  326.  
  327. def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
  328.                  reduce_expr, final_expr):
  329.    def final(src):
  330.       return final_expr.format(src= "(" + src + ")")
  331.    def reduce_(src0, src1):
  332.       return reduce_expr.format(src0=src0, src1=src1)
  333.    def prereduce(src0, src1):
  334.       return "(" + prereduce_expr.format(src0=src0, src1=src1) + ")"
  335.    src0 = prereduce("src0.x", "src1.x")
  336.    src1 = prereduce("src0.y", "src1.y")
  337.    src2 = prereduce("src0.z", "src1.z")
  338.    src3 = prereduce("src0.w", "src1.w")
  339.    opcode(name + "2", output_size, output_type,
  340.           [2, 2], [src_type, src_type], commutative,
  341.           final(reduce_(src0, src1)))
  342.    opcode(name + "3", output_size, output_type,
  343.           [3, 3], [src_type, src_type], commutative,
  344.           final(reduce_(reduce_(src0, src1), src2)))
  345.    opcode(name + "4", output_size, output_type,
  346.           [4, 4], [src_type, src_type], commutative,
  347.           final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
  348.  
  349. binop("fadd", tfloat, commutative + associative, "src0 + src1")
  350. binop("iadd", tint, commutative + associative, "src0 + src1")
  351. binop("fsub", tfloat, "", "src0 - src1")
  352. binop("isub", tint, "", "src0 - src1")
  353.  
  354. binop("fmul", tfloat, commutative + associative, "src0 * src1")
  355. # low 32-bits of signed/unsigned integer multiply
  356. binop("imul", tint, commutative + associative, "src0 * src1")
  357. # high 32-bits of signed integer multiply
  358. binop("imul_high", tint, commutative,
  359.       "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
  360. # high 32-bits of unsigned integer multiply
  361. binop("umul_high", tunsigned, commutative,
  362.       "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
  363.  
  364. binop("fdiv", tfloat, "", "src0 / src1")
  365. binop("idiv", tint, "", "src0 / src1")
  366. binop("udiv", tunsigned, "", "src0 / src1")
  367.  
  368. # returns a boolean representing the carry resulting from the addition of
  369. # the two unsigned arguments.
  370.  
  371. binop_convert("uadd_carry", tbool, tunsigned, commutative, "src0 + src1 < src0")
  372.  
  373. # returns a boolean representing the borrow resulting from the subtraction
  374. # of the two unsigned arguments.
  375.  
  376. binop_convert("usub_borrow", tbool, tunsigned, "", "src1 < src0")
  377.  
  378. binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
  379. binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1")
  380.  
  381. #
  382. # Comparisons
  383. #
  384.  
  385.  
  386. # these integer-aware comparisons return a boolean (0 or ~0)
  387.  
  388. binop_compare("flt", tfloat, "", "src0 < src1")
  389. binop_compare("fge", tfloat, "", "src0 >= src1")
  390. binop_compare("feq", tfloat, commutative, "src0 == src1")
  391. binop_compare("fne", tfloat, commutative, "src0 != src1")
  392. binop_compare("ilt", tint, "", "src0 < src1")
  393. binop_compare("ige", tint, "", "src0 >= src1")
  394. binop_compare("ieq", tint, commutative, "src0 == src1")
  395. binop_compare("ine", tint, commutative, "src0 != src1")
  396. binop_compare("ult", tunsigned, "", "src0 < src1")
  397. binop_compare("uge", tunsigned, "", "src0 >= src1")
  398.  
  399. # integer-aware GLSL-style comparisons that compare floats and ints
  400.  
  401. binop_reduce("ball_fequal",  1, tbool, tfloat, "{src0} == {src1}",
  402.              "{src0} && {src1}", "{src}")
  403. binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}",
  404.              "{src0} || {src1}", "{src}")
  405. binop_reduce("ball_iequal",  1, tbool, tint, "{src0} == {src1}",
  406.              "{src0} && {src1}", "{src}")
  407. binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
  408.              "{src0} || {src1}", "{src}")
  409.  
  410. # non-integer-aware GLSL-style comparisons that return 0.0 or 1.0
  411.  
  412. binop_reduce("fall_equal",  1, tfloat, tfloat, "{src0} == {src1}",
  413.              "{src0} && {src1}", "{src} ? 1.0f : 0.0f")
  414. binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}",
  415.              "{src0} || {src1}", "{src} ? 1.0f : 0.0f")
  416.  
  417. # These comparisons for integer-less hardware return 1.0 and 0.0 for true
  418. # and false respectively
  419.  
  420. binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
  421. binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
  422. binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
  423. binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
  424.  
  425.  
  426. binop("ishl", tint, "", "src0 << src1")
  427. binop("ishr", tint, "", "src0 >> src1")
  428. binop("ushr", tunsigned, "", "src0 >> src1")
  429.  
  430. # bitwise logic operators
  431. #
  432. # These are also used as boolean and, or, xor for hardware supporting
  433. # integers.
  434.  
  435.  
  436. binop("iand", tunsigned, commutative + associative, "src0 & src1")
  437. binop("ior", tunsigned, commutative + associative, "src0 | src1")
  438. binop("ixor", tunsigned, commutative + associative, "src0 ^ src1")
  439.  
  440.  
  441. # floating point logic operators
  442. #
  443. # These use (src != 0.0) for testing the truth of the input, and output 1.0
  444. # for true and 0.0 for false
  445.  
  446. binop("fand", tfloat, commutative,
  447.       "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
  448. binop("for", tfloat, commutative,
  449.       "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
  450. binop("fxor", tfloat, commutative,
  451.       "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
  452.  
  453. binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
  454.              "{src}")
  455.  
  456. binop("fmin", tfloat, "", "fminf(src0, src1)")
  457. binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
  458. binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
  459. binop("fmax", tfloat, "", "fmaxf(src0, src1)")
  460. binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
  461. binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0")
  462.  
  463. binop("fpow", tfloat, "", "powf(src0, src1)")
  464.  
  465. binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat,
  466.             "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
  467.  
  468. binop_convert("bfm", tunsigned, tint, "", """
  469. int offset = src0, bits = src1;
  470. if (offset < 0 || bits < 0 || offset + bits > 32)
  471.   dst = 0; /* undefined per the spec */
  472. else
  473.   dst = ((1 << bits)- 1) << offset;
  474. """)
  475.  
  476. opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """
  477. dst = ldexp(src0, src1);
  478. /* flush denormals to zero. */
  479. if (!isnormal(dst))
  480.   dst = copysign(0.0f, src0);
  481. """)
  482.  
  483. # Combines the first component of each input to make a 2-component vector.
  484.  
  485. binop_horiz("vec2", 2, tunsigned, 1, tunsigned, 1, tunsigned, """
  486. dst.x = src0.x;
  487. dst.y = src1.x;
  488. """)
  489.  
  490. def triop(name, ty, const_expr):
  491.    opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr)
  492. def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):
  493.    opcode(name, output_size, tunsigned,
  494.    [src1_size, src2_size, src3_size],
  495.    [tunsigned, tunsigned, tunsigned], "", const_expr)
  496.  
  497. triop("ffma", tfloat, "src0 * src1 + src2")
  498.  
  499. triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
  500.  
  501. # Conditional Select
  502. #
  503. # A vector conditional select instruction (like ?:, but operating per-
  504. # component on vectors). There are two versions, one for floating point
  505. # bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
  506.  
  507.  
  508. triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
  509. opcode("bcsel", 0, tunsigned, [0, 0, 0],
  510.       [tbool, tunsigned, tunsigned], "", "src0 ? src1 : src2")
  511.  
  512. triop("bfi", tunsigned, """
  513. unsigned mask = src0, insert = src1 & mask, base = src2;
  514. if (mask == 0) {
  515.   dst = base;
  516. } else {
  517.   unsigned tmp = mask;
  518.   while (!(tmp & 1)) {
  519.      tmp >>= 1;
  520.      insert <<= 1;
  521.   }
  522.   dst = (base & ~mask) | insert;
  523. }
  524. """)
  525.  
  526. opcode("ubitfield_extract", 0, tunsigned,
  527.        [0, 1, 1], [tunsigned, tint, tint], "", """
  528. unsigned base = src0;
  529. int offset = src1.x, bits = src2.x;
  530. if (bits == 0) {
  531.   dst = 0;
  532. } else if (bits < 0 || offset < 0 || offset + bits > 32) {
  533.   dst = 0; /* undefined per the spec */
  534. } else {
  535.   dst = (base >> offset) & ((1 << bits) - 1);
  536. }
  537. """)
  538. opcode("ibitfield_extract", 0, tint,
  539.        [0, 1, 1], [tint, tint, tint], "", """
  540. int base = src0;
  541. int offset = src1.x, bits = src2.x;
  542. if (bits == 0) {
  543.   dst = 0;
  544. } else if (offset < 0 || bits < 0 || offset + bits > 32) {
  545.   dst = 0;
  546. } else {
  547.   dst = (base << (32 - offset - bits)) >> offset; /* use sign-extending shift */
  548. }
  549. """)
  550.  
  551. # Combines the first component of each input to make a 3-component vector.
  552.  
  553. triop_horiz("vec3", 3, 1, 1, 1, """
  554. dst.x = src0.x;
  555. dst.y = src1.x;
  556. dst.z = src2.x;
  557. """)
  558.  
  559. def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
  560.                  src4_size, const_expr):
  561.    opcode(name, output_size, tunsigned,
  562.           [src1_size, src2_size, src3_size, src4_size],
  563.           [tunsigned, tunsigned, tunsigned, tunsigned],
  564.           "", const_expr)
  565.  
  566. opcode("bitfield_insert", 0, tunsigned, [0, 0, 1, 1],
  567.        [tunsigned, tunsigned, tint, tint], "", """
  568. unsigned base = src0, insert = src1;
  569. int offset = src2.x, bits = src3.x;
  570. if (bits == 0) {
  571.   dst = 0;
  572. } else if (offset < 0 || bits < 0 || bits + offset > 32) {
  573.   dst = 0;
  574. } else {
  575.   unsigned mask = ((1 << bits) - 1) << offset;
  576.   dst = (base & ~mask) | ((insert << bits) & mask);
  577. }
  578. """)
  579.  
  580. quadop_horiz("vec4", 4, 1, 1, 1, 1, """
  581. dst.x = src0.x;
  582. dst.y = src1.x;
  583. dst.z = src2.x;
  584. dst.w = src3.x;
  585. """)
  586.  
  587.  
  588.