Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2015 Intel Corporation
  3.  * Copyright © 2014-2015 Broadcom
  4.  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice (including the next
  14.  * paragraph) shall be included in all copies or substantial portions of the
  15.  * Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  23.  * IN THE SOFTWARE.
  24.  */
  25.  
  26. #include "nir/nir.h"
  27. #include "nir/nir_builder.h"
  28. #include "glsl/list.h"
  29. #include "main/imports.h"
  30. #include "util/ralloc.h"
  31.  
  32. #include "prog_to_nir.h"
  33. #include "prog_instruction.h"
  34. #include "prog_parameter.h"
  35. #include "prog_print.h"
  36.  
  37. /**
  38.  * \file prog_to_nir.c
  39.  *
  40.  * A translator from Mesa IR (prog_instruction.h) to NIR.  This is primarily
  41.  * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
  42.  * vertex processing.  Full GLSL support should use glsl_to_nir instead.
  43.  */
  44.  
  45. struct ptn_compile {
  46.    const struct gl_program *prog;
  47.    nir_builder build;
  48.    bool error;
  49.  
  50.    nir_variable *input_vars[VARYING_SLOT_MAX];
  51.    nir_variable *output_vars[VARYING_SLOT_MAX];
  52.    nir_register **output_regs;
  53.    nir_register **temp_regs;
  54.  
  55.    nir_register *addr_reg;
  56. };
  57.  
  58. #define SWIZ(X, Y, Z, W) \
  59.    (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
  60. #define ptn_swizzle(b, src, x, y, z, w) nir_swizzle(b, src, SWIZ(x, y, z, w), 4, true)
  61. #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
  62.  
  63. static nir_ssa_def *
  64. ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
  65. {
  66.    nir_builder *b = &c->build;
  67.  
  68.    nir_alu_src src;
  69.    memset(&src, 0, sizeof(src));
  70.  
  71.    if (dest->dest.is_ssa)
  72.       src.src = nir_src_for_ssa(&dest->dest.ssa);
  73.    else {
  74.       assert(!dest->dest.reg.indirect);
  75.       src.src = nir_src_for_reg(dest->dest.reg.reg);
  76.       src.src.reg.base_offset = dest->dest.reg.base_offset;
  77.    }
  78.  
  79.    for (int i = 0; i < 4; i++)
  80.       src.swizzle[i] = i;
  81.  
  82.    return nir_fmov_alu(b, src, 4);
  83. }
  84.  
  85. static nir_alu_dest
  86. ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
  87. {
  88.    nir_alu_dest dest;
  89.  
  90.    memset(&dest, 0, sizeof(dest));
  91.  
  92.    switch (prog_dst->File) {
  93.    case PROGRAM_TEMPORARY:
  94.       dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
  95.       break;
  96.    case PROGRAM_OUTPUT:
  97.       dest.dest.reg.reg = c->output_regs[prog_dst->Index];
  98.       break;
  99.    case PROGRAM_ADDRESS:
  100.       assert(prog_dst->Index == 0);
  101.       dest.dest.reg.reg = c->addr_reg;
  102.       break;
  103.    case PROGRAM_UNDEFINED:
  104.       break;
  105.    }
  106.  
  107.    dest.write_mask = prog_dst->WriteMask;
  108.    dest.saturate = false;
  109.  
  110.    assert(!prog_dst->RelAddr);
  111.  
  112.    return dest;
  113. }
  114.  
  115. /**
  116.  * Multiply the contents of the ADDR register by 4 to convert from the number
  117.  * of vec4s to the number of floating point components.
  118.  */
  119. static nir_ssa_def *
  120. ptn_addr_reg_value(struct ptn_compile *c)
  121. {
  122.    nir_builder *b = &c->build;
  123.    nir_alu_src src;
  124.    memset(&src, 0, sizeof(src));
  125.    src.src = nir_src_for_reg(c->addr_reg);
  126.  
  127.    return nir_imul(b, nir_fmov_alu(b, src, 1), nir_imm_int(b, 4));
  128. }
  129.  
  130. static nir_ssa_def *
  131. ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
  132. {
  133.    nir_builder *b = &c->build;
  134.    nir_alu_src src;
  135.  
  136.    memset(&src, 0, sizeof(src));
  137.  
  138.    switch (prog_src->File) {
  139.    case PROGRAM_UNDEFINED:
  140.       return nir_imm_float(b, 0.0);
  141.    case PROGRAM_TEMPORARY:
  142.       assert(!prog_src->RelAddr && prog_src->Index >= 0);
  143.       src.src.reg.reg = c->temp_regs[prog_src->Index];
  144.       break;
  145.    case PROGRAM_INPUT: {
  146.       /* ARB_vertex_program doesn't allow relative addressing on vertex
  147.        * attributes; ARB_fragment_program has no relative addressing at all.
  148.        */
  149.       assert(!prog_src->RelAddr);
  150.  
  151.       assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
  152.  
  153.       nir_intrinsic_instr *load =
  154.          nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
  155.       load->num_components = 4;
  156.       load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]);
  157.  
  158.       nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
  159.       nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
  160.  
  161.       src.src = nir_src_for_ssa(&load->dest.ssa);
  162.       break;
  163.    }
  164.    case PROGRAM_STATE_VAR:
  165.    case PROGRAM_CONSTANT: {
  166.       /* We actually want to look at the type in the Parameters list for this,
  167.        * because it lets us upload constant builtin uniforms as actual
  168.        * constants.
  169.        */
  170.       struct gl_program_parameter_list *plist = c->prog->Parameters;
  171.       gl_register_file file = prog_src->RelAddr ? prog_src->File :
  172.          plist->Parameters[prog_src->Index].Type;
  173.  
  174.       switch (file) {
  175.       case PROGRAM_CONSTANT:
  176.          if ((c->prog->IndirectRegisterFiles & (1 << PROGRAM_CONSTANT)) == 0) {
  177.             float *v = (float *) plist->ParameterValues[prog_src->Index];
  178.             src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
  179.             break;
  180.          }
  181.          /* FALLTHROUGH */
  182.       case PROGRAM_STATE_VAR: {
  183.          nir_intrinsic_op load_op =
  184.             prog_src->RelAddr ? nir_intrinsic_load_uniform_indirect :
  185.                                 nir_intrinsic_load_uniform;
  186.          nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, load_op);
  187.          nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
  188.          load->num_components = 4;
  189.  
  190.          /* Multiply src->Index by 4 to scale from # of vec4s to components. */
  191.          load->const_index[0] = 4 * prog_src->Index;
  192.          load->const_index[1] = 1;
  193.  
  194.          if (prog_src->RelAddr) {
  195.             nir_ssa_def *reladdr = ptn_addr_reg_value(c);
  196.             if (prog_src->Index < 0) {
  197.                /* This is a negative offset which should be added to the address
  198.                 * register's value.
  199.                 */
  200.                reladdr = nir_iadd(b, reladdr, nir_imm_int(b, load->const_index[0]));
  201.                load->const_index[0] = 0;
  202.             }
  203.             load->src[0] = nir_src_for_ssa(reladdr);
  204.          }
  205.  
  206.          nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
  207.  
  208.          src.src = nir_src_for_ssa(&load->dest.ssa);
  209.          break;
  210.       }
  211.       default:
  212.          fprintf(stderr, "bad uniform src register file: %s (%d)\n",
  213.                  _mesa_register_file_name(file), file);
  214.          abort();
  215.       }
  216.       break;
  217.    }
  218.    default:
  219.       fprintf(stderr, "unknown src register file: %s (%d)\n",
  220.               _mesa_register_file_name(prog_src->File), prog_src->File);
  221.       abort();
  222.    }
  223.  
  224.    nir_ssa_def *def;
  225.    if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
  226.        (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
  227.       /* The simple non-SWZ case. */
  228.       for (int i = 0; i < 4; i++)
  229.          src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
  230.  
  231.       def = nir_fmov_alu(b, src, 4);
  232.  
  233.       if (prog_src->Abs)
  234.          def = nir_fabs(b, def);
  235.  
  236.       if (prog_src->Negate)
  237.          def = nir_fneg(b, def);
  238.    } else {
  239.       /* The SWZ instruction allows per-component zero/one swizzles, and also
  240.        * per-component negation.
  241.        */
  242.       nir_ssa_def *chans[4];
  243.       for (int i = 0; i < 4; i++) {
  244.          int swizzle = GET_SWZ(prog_src->Swizzle, i);
  245.          if (swizzle == SWIZZLE_ZERO) {
  246.             chans[i] = nir_imm_float(b, 0.0);
  247.          } else if (swizzle == SWIZZLE_ONE) {
  248.             chans[i] = nir_imm_float(b, 1.0);
  249.          } else {
  250.             assert(swizzle != SWIZZLE_NIL);
  251.             nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
  252.             nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
  253.             mov->dest.write_mask = 0x1;
  254.             mov->src[0] = src;
  255.             mov->src[0].swizzle[0] = swizzle;
  256.             nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
  257.  
  258.             chans[i] = &mov->dest.dest.ssa;
  259.          }
  260.  
  261.          if (prog_src->Abs)
  262.             chans[i] = nir_fabs(b, chans[i]);
  263.  
  264.          if (prog_src->Negate & (1 << i))
  265.             chans[i] = nir_fneg(b, chans[i]);
  266.       }
  267.       def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
  268.    }
  269.  
  270.    return def;
  271. }
  272.  
  273. static void
  274. ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  275. {
  276.    unsigned num_srcs = nir_op_infos[op].num_inputs;
  277.    nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
  278.    unsigned i;
  279.  
  280.    for (i = 0; i < num_srcs; i++)
  281.       instr->src[i].src = nir_src_for_ssa(src[i]);
  282.  
  283.    instr->dest = dest;
  284.    nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
  285. }
  286.  
  287. static void
  288. ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
  289.                      nir_ssa_def *def, unsigned write_mask)
  290. {
  291.    if (!(dest.write_mask & write_mask))
  292.       return;
  293.  
  294.    nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
  295.    if (!mov)
  296.       return;
  297.  
  298.    mov->dest = dest;
  299.    mov->dest.write_mask &= write_mask;
  300.    mov->src[0].src = nir_src_for_ssa(def);
  301.    for (unsigned i = def->num_components; i < 4; i++)
  302.       mov->src[0].swizzle[i] = def->num_components - 1;
  303.    nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
  304. }
  305.  
  306. static void
  307. ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
  308. {
  309.    ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
  310. }
  311.  
  312. static void
  313. ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  314. {
  315.    ptn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0])));
  316. }
  317.  
  318. /* EXP - Approximate Exponential Base 2
  319.  *  dst.x = 2^{\lfloor src.x\rfloor}
  320.  *  dst.y = src.x - \lfloor src.x\rfloor
  321.  *  dst.z = 2^{src.x}
  322.  *  dst.w = 1.0
  323.  */
  324. static void
  325. ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  326. {
  327.    nir_ssa_def *srcx = ptn_channel(b, src[0], X);
  328.  
  329.    ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
  330.    ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
  331.    ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
  332.    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
  333. }
  334.  
  335. /* LOG - Approximate Logarithm Base 2
  336.  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
  337.  *  dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
  338.  *  dst.z = \log_2{|src.x|}
  339.  *  dst.w = 1.0
  340.  */
  341. static void
  342. ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  343. {
  344.    nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
  345.    nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
  346.    nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
  347.  
  348.    ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
  349.    ptn_move_dest_masked(b, dest,
  350.                         nir_fmul(b, abs_srcx,
  351.                                  nir_fexp2(b, nir_fneg(b, floor_log2))),
  352.                         WRITEMASK_Y);
  353.    ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
  354.    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
  355. }
  356.  
  357. /* DST - Distance Vector
  358.  *   dst.x = 1.0
  359.  *   dst.y = src0.y \times src1.y
  360.  *   dst.z = src0.z
  361.  *   dst.w = src1.w
  362.  */
  363. static void
  364. ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  365. {
  366.    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
  367.    ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
  368.    ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
  369.    ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
  370. }
  371.  
  372. /* LIT - Light Coefficients
  373.  *  dst.x = 1.0
  374.  *  dst.y = max(src.x, 0.0)
  375.  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
  376.  *  dst.w = 1.0
  377.  */
  378. static void
  379. ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  380. {
  381.    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
  382.  
  383.    ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
  384.                                           nir_imm_float(b, 0.0)), WRITEMASK_Y);
  385.  
  386.    if (dest.write_mask & WRITEMASK_Z) {
  387.       nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
  388.       nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
  389.                                                  nir_imm_float(b, 128.0)),
  390.                                      nir_imm_float(b, -128.0));
  391.       nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
  392.                                   wclamp);
  393.  
  394.       nir_ssa_def *z;
  395.       if (b->shader->options->native_integers) {
  396.          z = nir_bcsel(b,
  397.                        nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
  398.                        nir_imm_float(b, 0.0),
  399.                        pow);
  400.       } else {
  401.          z = nir_fcsel(b,
  402.                        nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
  403.                        nir_imm_float(b, 0.0),
  404.                        pow);
  405.       }
  406.  
  407.       ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
  408.    }
  409. }
  410.  
  411. /* SCS - Sine Cosine
  412.  *   dst.x = \cos{src.x}
  413.  *   dst.y = \sin{src.x}
  414.  *   dst.z = 0.0
  415.  *   dst.w = 1.0
  416.  */
  417. static void
  418. ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  419. {
  420.    ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
  421.                         WRITEMASK_X);
  422.    ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
  423.                         WRITEMASK_Y);
  424.    ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
  425.    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
  426. }
  427.  
  428. /**
  429.  * Emit SLT.  For platforms with integers, prefer b2f(flt(...)).
  430.  */
  431. static void
  432. ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  433. {
  434.    if (b->shader->options->native_integers) {
  435.       ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
  436.    } else {
  437.       ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
  438.    }
  439. }
  440.  
  441. /**
  442.  * Emit SGE.  For platforms with integers, prefer b2f(fge(...)).
  443.  */
  444. static void
  445. ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  446. {
  447.    if (b->shader->options->native_integers) {
  448.       ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
  449.    } else {
  450.       ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
  451.    }
  452. }
  453.  
  454. static void
  455. ptn_sle(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  456. {
  457.    nir_ssa_def *commuted[] = { src[1], src[0] };
  458.    ptn_sge(b, dest, commuted);
  459. }
  460.  
  461. static void
  462. ptn_sgt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  463. {
  464.    nir_ssa_def *commuted[] = { src[1], src[0] };
  465.    ptn_slt(b, dest, commuted);
  466. }
  467.  
  468. /**
  469.  * Emit SEQ.  For platforms with integers, prefer b2f(feq(...)).
  470.  */
  471. static void
  472. ptn_seq(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  473. {
  474.    if (b->shader->options->native_integers) {
  475.       ptn_move_dest(b, dest, nir_b2f(b, nir_feq(b, src[0], src[1])));
  476.    } else {
  477.       ptn_move_dest(b, dest, nir_seq(b, src[0], src[1]));
  478.    }
  479. }
  480.  
  481. /**
  482.  * Emit SNE.  For platforms with integers, prefer b2f(fne(...)).
  483.  */
  484. static void
  485. ptn_sne(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  486. {
  487.    if (b->shader->options->native_integers) {
  488.       ptn_move_dest(b, dest, nir_b2f(b, nir_fne(b, src[0], src[1])));
  489.    } else {
  490.       ptn_move_dest(b, dest, nir_sne(b, src[0], src[1]));
  491.    }
  492. }
  493.  
  494. static void
  495. ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  496. {
  497.    ptn_move_dest_masked(b, dest,
  498.                         nir_fsub(b,
  499.                                  nir_fmul(b,
  500.                                           ptn_swizzle(b, src[0], Y, Z, X, X),
  501.                                           ptn_swizzle(b, src[1], Z, X, Y, X)),
  502.                                  nir_fmul(b,
  503.                                           ptn_swizzle(b, src[1], Y, Z, X, X),
  504.                                           ptn_swizzle(b, src[0], Z, X, Y, X))),
  505.                         WRITEMASK_XYZ);
  506.    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
  507. }
  508.  
  509. static void
  510. ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  511. {
  512.    ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
  513. }
  514.  
  515. static void
  516. ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  517. {
  518.    ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
  519. }
  520.  
  521. static void
  522. ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  523. {
  524.    ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
  525. }
  526.  
  527. static void
  528. ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  529. {
  530.    nir_ssa_def *dp3 = nir_fdot3(b, src[0], src[1]);
  531.    ptn_move_dest(b, dest, nir_fadd(b, dp3, ptn_channel(b, src[1], W)));
  532. }
  533.  
  534. static void
  535. ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  536. {
  537.    if (b->shader->options->native_integers) {
  538.       ptn_move_dest(b, dest, nir_bcsel(b,
  539.                                        nir_flt(b, src[0], nir_imm_float(b, 0.0)),
  540.                                        src[1], src[2]));
  541.    } else {
  542.       ptn_move_dest(b, dest, nir_fcsel(b,
  543.                                        nir_slt(b, src[0], nir_imm_float(b, 0.0)),
  544.                                        src[1], src[2]));
  545.    }
  546. }
  547.  
  548. static void
  549. ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  550. {
  551.    ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
  552. }
  553.  
  554. static void
  555. ptn_kil(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  556. {
  557.    nir_ssa_def *cmp = b->shader->options->native_integers ?
  558.       nir_bany4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0))) :
  559.       nir_fany4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)));
  560.  
  561.    nir_intrinsic_instr *discard =
  562.       nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
  563.    discard->src[0] = nir_src_for_ssa(cmp);
  564.    nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
  565. }
  566.  
  567. static void
  568. ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
  569.         struct prog_instruction *prog_inst)
  570. {
  571.    nir_tex_instr *instr;
  572.    nir_texop op;
  573.    unsigned num_srcs;
  574.  
  575.    switch (prog_inst->Opcode) {
  576.    case OPCODE_TEX:
  577.       op = nir_texop_tex;
  578.       num_srcs = 1;
  579.       break;
  580.    case OPCODE_TXB:
  581.       op = nir_texop_txb;
  582.       num_srcs = 2;
  583.       break;
  584.    case OPCODE_TXD:
  585.       op = nir_texop_txd;
  586.       num_srcs = 3;
  587.       break;
  588.    case OPCODE_TXL:
  589.       op = nir_texop_txl;
  590.       num_srcs = 2;
  591.       break;
  592.    case OPCODE_TXP:
  593.       op = nir_texop_tex;
  594.       num_srcs = 2;
  595.       break;
  596.    case OPCODE_TXP_NV:
  597.       assert(!"not handled");
  598.       op = nir_texop_tex;
  599.       num_srcs = 2;
  600.       break;
  601.    default:
  602.       fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
  603.       abort();
  604.    }
  605.  
  606.    if (prog_inst->TexShadow)
  607.       num_srcs++;
  608.  
  609.    instr = nir_tex_instr_create(b->shader, num_srcs);
  610.    instr->op = op;
  611.    instr->dest_type = nir_type_float;
  612.    instr->is_shadow = prog_inst->TexShadow;
  613.    instr->sampler_index = prog_inst->TexSrcUnit;
  614.  
  615.    switch (prog_inst->TexSrcTarget) {
  616.    case TEXTURE_1D_INDEX:
  617.       instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
  618.       break;
  619.    case TEXTURE_2D_INDEX:
  620.       instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
  621.       break;
  622.    case TEXTURE_3D_INDEX:
  623.       instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
  624.       break;
  625.    case TEXTURE_CUBE_INDEX:
  626.       instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
  627.       break;
  628.    case TEXTURE_RECT_INDEX:
  629.       instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
  630.       break;
  631.    default:
  632.       fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
  633.       abort();
  634.    }
  635.  
  636.    switch (instr->sampler_dim) {
  637.    case GLSL_SAMPLER_DIM_1D:
  638.    case GLSL_SAMPLER_DIM_BUF:
  639.       instr->coord_components = 1;
  640.       break;
  641.    case GLSL_SAMPLER_DIM_2D:
  642.    case GLSL_SAMPLER_DIM_RECT:
  643.    case GLSL_SAMPLER_DIM_EXTERNAL:
  644.    case GLSL_SAMPLER_DIM_MS:
  645.       instr->coord_components = 2;
  646.       break;
  647.    case GLSL_SAMPLER_DIM_3D:
  648.    case GLSL_SAMPLER_DIM_CUBE:
  649.       instr->coord_components = 3;
  650.       break;
  651.    }
  652.  
  653.    unsigned src_number = 0;
  654.  
  655.    instr->src[src_number].src =
  656.       nir_src_for_ssa(ptn_swizzle(b, src[0], X, Y, Z, W));
  657.    instr->src[src_number].src_type = nir_tex_src_coord;
  658.    src_number++;
  659.  
  660.    if (prog_inst->Opcode == OPCODE_TXP) {
  661.       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
  662.       instr->src[src_number].src_type = nir_tex_src_projector;
  663.       src_number++;
  664.    }
  665.  
  666.    if (prog_inst->Opcode == OPCODE_TXB) {
  667.       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
  668.       instr->src[src_number].src_type = nir_tex_src_bias;
  669.       src_number++;
  670.    }
  671.  
  672.    if (prog_inst->Opcode == OPCODE_TXL) {
  673.       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
  674.       instr->src[src_number].src_type = nir_tex_src_lod;
  675.       src_number++;
  676.    }
  677.  
  678.    if (instr->is_shadow) {
  679.       if (instr->coord_components < 3)
  680.          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
  681.       else
  682.          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
  683.  
  684.       instr->src[src_number].src_type = nir_tex_src_comparitor;
  685.       src_number++;
  686.    }
  687.  
  688.    assert(src_number == num_srcs);
  689.  
  690.    nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
  691.    nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
  692.  
  693.    /* Resolve the writemask on the texture op. */
  694.    ptn_move_dest(b, dest, &instr->dest.ssa);
  695. }
  696.  
  697. static const nir_op op_trans[MAX_OPCODE] = {
  698.    [OPCODE_NOP] = 0,
  699.    [OPCODE_ABS] = nir_op_fabs,
  700.    [OPCODE_ADD] = nir_op_fadd,
  701.    [OPCODE_ARL] = 0,
  702.    [OPCODE_CMP] = 0,
  703.    [OPCODE_COS] = nir_op_fcos,
  704.    [OPCODE_DDX] = nir_op_fddx,
  705.    [OPCODE_DDY] = nir_op_fddy,
  706.    [OPCODE_DP2] = 0,
  707.    [OPCODE_DP3] = 0,
  708.    [OPCODE_DP4] = 0,
  709.    [OPCODE_DPH] = 0,
  710.    [OPCODE_DST] = 0,
  711.    [OPCODE_END] = 0,
  712.    [OPCODE_EX2] = nir_op_fexp2,
  713.    [OPCODE_EXP] = 0,
  714.    [OPCODE_FLR] = nir_op_ffloor,
  715.    [OPCODE_FRC] = nir_op_ffract,
  716.    [OPCODE_LG2] = nir_op_flog2,
  717.    [OPCODE_LIT] = 0,
  718.    [OPCODE_LOG] = 0,
  719.    [OPCODE_LRP] = 0,
  720.    [OPCODE_MAD] = nir_op_ffma,
  721.    [OPCODE_MAX] = nir_op_fmax,
  722.    [OPCODE_MIN] = nir_op_fmin,
  723.    [OPCODE_MOV] = nir_op_fmov,
  724.    [OPCODE_MUL] = nir_op_fmul,
  725.    [OPCODE_POW] = nir_op_fpow,
  726.    [OPCODE_RCP] = nir_op_frcp,
  727.  
  728.    [OPCODE_RSQ] = nir_op_frsq,
  729.    [OPCODE_SCS] = 0,
  730.    [OPCODE_SEQ] = 0,
  731.    [OPCODE_SGE] = 0,
  732.    [OPCODE_SGT] = 0,
  733.    [OPCODE_SIN] = nir_op_fsin,
  734.    [OPCODE_SLE] = 0,
  735.    [OPCODE_SLT] = 0,
  736.    [OPCODE_SNE] = 0,
  737.    [OPCODE_SSG] = nir_op_fsign,
  738.    [OPCODE_SUB] = nir_op_fsub,
  739.    [OPCODE_SWZ] = 0,
  740.    [OPCODE_TEX] = 0,
  741.    [OPCODE_TRUNC] = nir_op_ftrunc,
  742.    [OPCODE_TXB] = 0,
  743.    [OPCODE_TXD] = 0,
  744.    [OPCODE_TXL] = 0,
  745.    [OPCODE_TXP] = 0,
  746.    [OPCODE_TXP_NV] = 0,
  747.    [OPCODE_XPD] = 0,
  748. };
  749.  
  750. static void
  751. ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
  752. {
  753.    nir_builder *b = &c->build;
  754.    unsigned i;
  755.    const unsigned op = prog_inst->Opcode;
  756.  
  757.    if (op == OPCODE_END)
  758.       return;
  759.  
  760.    nir_ssa_def *src[3];
  761.    for (i = 0; i < 3; i++) {
  762.       src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
  763.    }
  764.    nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
  765.    if (c->error)
  766.       return;
  767.  
  768.    switch (op) {
  769.    case OPCODE_RSQ:
  770.       ptn_move_dest(b, dest, nir_frsq(b, ptn_channel(b, src[0], X)));
  771.       break;
  772.  
  773.    case OPCODE_RCP:
  774.       ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
  775.       break;
  776.  
  777.    case OPCODE_EX2:
  778.       ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
  779.       break;
  780.  
  781.    case OPCODE_LG2:
  782.       ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
  783.       break;
  784.  
  785.    case OPCODE_POW:
  786.       ptn_move_dest(b, dest, nir_fpow(b,
  787.                                       ptn_channel(b, src[0], X),
  788.                                       ptn_channel(b, src[1], X)));
  789.       break;
  790.  
  791.    case OPCODE_COS:
  792.       ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
  793.       break;
  794.  
  795.    case OPCODE_SIN:
  796.       ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
  797.       break;
  798.  
  799.    case OPCODE_ARL:
  800.       ptn_arl(b, dest, src);
  801.       break;
  802.  
  803.    case OPCODE_EXP:
  804.       ptn_exp(b, dest, src);
  805.       break;
  806.  
  807.    case OPCODE_LOG:
  808.       ptn_log(b, dest, src);
  809.       break;
  810.  
  811.    case OPCODE_LRP:
  812.       ptn_lrp(b, dest, src);
  813.       break;
  814.  
  815.    case OPCODE_DST:
  816.       ptn_dst(b, dest, src);
  817.       break;
  818.  
  819.    case OPCODE_LIT:
  820.       ptn_lit(b, dest, src);
  821.       break;
  822.  
  823.    case OPCODE_XPD:
  824.       ptn_xpd(b, dest, src);
  825.       break;
  826.  
  827.    case OPCODE_DP2:
  828.       ptn_dp2(b, dest, src);
  829.       break;
  830.  
  831.    case OPCODE_DP3:
  832.       ptn_dp3(b, dest, src);
  833.       break;
  834.  
  835.    case OPCODE_DP4:
  836.       ptn_dp4(b, dest, src);
  837.       break;
  838.  
  839.    case OPCODE_DPH:
  840.       ptn_dph(b, dest, src);
  841.       break;
  842.  
  843.    case OPCODE_KIL:
  844.       ptn_kil(b, dest, src);
  845.       break;
  846.  
  847.    case OPCODE_CMP:
  848.       ptn_cmp(b, dest, src);
  849.       break;
  850.  
  851.    case OPCODE_SCS:
  852.       ptn_scs(b, dest, src);
  853.       break;
  854.  
  855.    case OPCODE_SLT:
  856.       ptn_slt(b, dest, src);
  857.       break;
  858.  
  859.    case OPCODE_SGT:
  860.       ptn_sgt(b, dest, src);
  861.       break;
  862.  
  863.    case OPCODE_SLE:
  864.       ptn_sle(b, dest, src);
  865.       break;
  866.  
  867.    case OPCODE_SGE:
  868.       ptn_sge(b, dest, src);
  869.       break;
  870.  
  871.    case OPCODE_SEQ:
  872.       ptn_seq(b, dest, src);
  873.       break;
  874.  
  875.    case OPCODE_SNE:
  876.       ptn_sne(b, dest, src);
  877.       break;
  878.  
  879.    case OPCODE_TEX:
  880.    case OPCODE_TXB:
  881.    case OPCODE_TXD:
  882.    case OPCODE_TXL:
  883.    case OPCODE_TXP:
  884.    case OPCODE_TXP_NV:
  885.       ptn_tex(b, dest, src, prog_inst);
  886.       break;
  887.  
  888.    case OPCODE_SWZ:
  889.       /* Extended swizzles were already handled in ptn_get_src(). */
  890.       ptn_alu(b, nir_op_fmov, dest, src);
  891.       break;
  892.  
  893.    case OPCODE_NOP:
  894.       break;
  895.  
  896.    default:
  897.       if (op_trans[op] != 0 || op == OPCODE_MOV) {
  898.          ptn_alu(b, op_trans[op], dest, src);
  899.       } else {
  900.          fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
  901.          abort();
  902.       }
  903.       break;
  904.    }
  905.  
  906.    if (prog_inst->SaturateMode) {
  907.       assert(prog_inst->SaturateMode == SATURATE_ZERO_ONE);
  908.       assert(!dest.dest.is_ssa);
  909.       ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
  910.    }
  911. }
  912.  
  913. /**
  914.  * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
  915.  * variables at the end of the shader.
  916.  *
  917.  * We don't generate these incrementally as the PROGRAM_OUTPUT values are
  918.  * written, because there's no output load intrinsic, which means we couldn't
  919.  * handle writemasks.
  920.  */
  921. static void
  922. ptn_add_output_stores(struct ptn_compile *c)
  923. {
  924.    nir_builder *b = &c->build;
  925.  
  926.    foreach_list_typed(nir_variable, var, node, &b->shader->outputs) {
  927.       nir_intrinsic_instr *store =
  928.          nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
  929.       store->num_components = 4;
  930.       store->variables[0] =
  931.          nir_deref_var_create(store, c->output_vars[var->data.location]);
  932.       store->src[0].reg.reg = c->output_regs[var->data.location];
  933.       nir_instr_insert_after_cf_list(c->build.cf_node_list, &store->instr);
  934.    }
  935. }
  936.  
  937. static void
  938. setup_registers_and_variables(struct ptn_compile *c)
  939. {
  940.    nir_builder *b = &c->build;
  941.    struct nir_shader *shader = b->shader;
  942.  
  943.    /* Create input variables. */
  944.    const int num_inputs = _mesa_flsll(c->prog->InputsRead);
  945.    for (int i = 0; i < num_inputs; i++) {
  946.       if (!(c->prog->InputsRead & BITFIELD64_BIT(i)))
  947.          continue;
  948.       nir_variable *var = rzalloc(shader, nir_variable);
  949.       var->type = glsl_vec4_type();
  950.       var->data.read_only = true;
  951.       var->data.mode = nir_var_shader_in;
  952.       var->name = ralloc_asprintf(var, "in_%d", i);
  953.       var->data.location = i;
  954.       var->data.index = 0;
  955.  
  956.       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
  957.          struct gl_fragment_program *fp =
  958.             (struct gl_fragment_program *) c->prog;
  959.  
  960.          var->data.interpolation = fp->InterpQualifier[i];
  961.  
  962.          if (i == VARYING_SLOT_POS) {
  963.             var->data.origin_upper_left = fp->OriginUpperLeft;
  964.             var->data.pixel_center_integer = fp->PixelCenterInteger;
  965.          } else if (i == VARYING_SLOT_FOGC) {
  966.             /* fogcoord is defined as <f, 0.0, 0.0, 1.0>.  Make the actual
  967.              * input variable a float, and create a local containing the
  968.              * full vec4 value.
  969.              */
  970.             var->type = glsl_float_type();
  971.  
  972.             nir_intrinsic_instr *load_x =
  973.                nir_intrinsic_instr_create(shader, nir_intrinsic_load_var);
  974.             load_x->num_components = 1;
  975.             load_x->variables[0] = nir_deref_var_create(load_x, var);
  976.             nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, NULL);
  977.             nir_instr_insert_after_cf_list(b->cf_node_list, &load_x->instr);
  978.  
  979.             nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
  980.                                          nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
  981.  
  982.             nir_variable *fullvar = rzalloc(shader, nir_variable);
  983.             fullvar->type = glsl_vec4_type();
  984.             fullvar->data.mode = nir_var_local;
  985.             fullvar->name = "fogcoord_tmp";
  986.             exec_list_push_tail(&b->impl->locals, &fullvar->node);
  987.  
  988.             nir_intrinsic_instr *store =
  989.                nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
  990.             store->num_components = 4;
  991.             store->variables[0] = nir_deref_var_create(store, fullvar);
  992.             store->src[0] = nir_src_for_ssa(f001);
  993.             nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
  994.  
  995.             /* Insert the real input into the list so the driver has real
  996.              * inputs, but set c->input_vars[i] to the temporary so we use
  997.              * the splatted value.
  998.              */
  999.             exec_list_push_tail(&shader->inputs, &var->node);
  1000.             c->input_vars[i] = fullvar;
  1001.             continue;
  1002.          }
  1003.       }
  1004.  
  1005.       exec_list_push_tail(&shader->inputs, &var->node);
  1006.       c->input_vars[i] = var;
  1007.    }
  1008.  
  1009.    /* Create output registers and variables. */
  1010.    int max_outputs = _mesa_fls(c->prog->OutputsWritten);
  1011.    c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
  1012.  
  1013.    for (int i = 0; i < max_outputs; i++) {
  1014.       if (!(c->prog->OutputsWritten & BITFIELD64_BIT(i)))
  1015.          continue;
  1016.  
  1017.       /* Since we can't load from outputs in the IR, we make temporaries
  1018.        * for the outputs and emit stores to the real outputs at the end of
  1019.        * the shader.
  1020.        */
  1021.       nir_register *reg = nir_local_reg_create(b->impl);
  1022.       reg->num_components = 4;
  1023.  
  1024.       nir_variable *var = rzalloc(shader, nir_variable);
  1025.       var->type = glsl_vec4_type();
  1026.       var->data.mode = nir_var_shader_out;
  1027.       var->name = ralloc_asprintf(var, "out_%d", i);
  1028.  
  1029.       var->data.location = i;
  1030.       var->data.index = 0;
  1031.  
  1032.       c->output_regs[i] = reg;
  1033.  
  1034.       exec_list_push_tail(&shader->outputs, &var->node);
  1035.       c->output_vars[i] = var;
  1036.    }
  1037.  
  1038.    /* Create temporary registers. */
  1039.    c->temp_regs = rzalloc_array(c, nir_register *, c->prog->NumTemporaries);
  1040.  
  1041.    nir_register *reg;
  1042.    for (int i = 0; i < c->prog->NumTemporaries; i++) {
  1043.       reg = nir_local_reg_create(b->impl);
  1044.       if (!reg) {
  1045.          c->error = true;
  1046.          return;
  1047.       }
  1048.       reg->num_components = 4;
  1049.       c->temp_regs[i] = reg;
  1050.    }
  1051.  
  1052.    /* Create the address register (for ARB_vertex_program). */
  1053.    reg = nir_local_reg_create(b->impl);
  1054.    if (!reg) {
  1055.       c->error = true;
  1056.       return;
  1057.    }
  1058.    reg->num_components = 1;
  1059.    c->addr_reg = reg;
  1060.  
  1061.    /* Set the number of uniforms */
  1062.    shader->num_uniforms = 4 * c->prog->Parameters->NumParameters;
  1063. }
  1064.  
  1065. struct nir_shader *
  1066. prog_to_nir(const struct gl_program *prog, const nir_shader_compiler_options *options)
  1067. {
  1068.    struct ptn_compile *c;
  1069.    struct nir_shader *s;
  1070.  
  1071.    c = rzalloc(NULL, struct ptn_compile);
  1072.    if (!c)
  1073.       return NULL;
  1074.    s = nir_shader_create(NULL, options);
  1075.    if (!s)
  1076.       goto fail;
  1077.    c->prog = prog;
  1078.  
  1079.    nir_function *func = nir_function_create(s, "main");
  1080.    nir_function_overload *overload = nir_function_overload_create(func);
  1081.    nir_function_impl *impl = nir_function_impl_create(overload);
  1082.  
  1083.    c->build.shader = s;
  1084.    c->build.impl = impl;
  1085.    c->build.cf_node_list = &impl->body;
  1086.  
  1087.    setup_registers_and_variables(c);
  1088.    if (unlikely(c->error))
  1089.       goto fail;
  1090.  
  1091.    for (unsigned int i = 0; i < prog->NumInstructions; i++) {
  1092.       ptn_emit_instruction(c, &prog->Instructions[i]);
  1093.  
  1094.       if (unlikely(c->error))
  1095.          break;
  1096.    }
  1097.  
  1098.    ptn_add_output_stores(c);
  1099.  
  1100. fail:
  1101.    if (c->error) {
  1102.       ralloc_free(s);
  1103.       s = NULL;
  1104.    }
  1105.    ralloc_free(c);
  1106.    return s;
  1107. }
  1108.