Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2012 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. /** @file brw_eu_compact.c
  25.  *
  26.  * Instruction compaction is a feature of G45 and newer hardware that allows
  27.  * for a smaller instruction encoding.
  28.  *
  29.  * The instruction cache is on the order of 32KB, and many programs generate
  30.  * far more instructions than that.  The instruction cache is built to barely
  31.  * keep up with instruction dispatch ability in cache hit cases -- L1
  32.  * instruction cache misses that still hit in the next level could limit
  33.  * throughput by around 50%.
  34.  *
  35.  * The idea of instruction compaction is that most instructions use a tiny
  36.  * subset of the GPU functionality, so we can encode what would be a 16 byte
  37.  * instruction in 8 bytes using some lookup tables for various fields.
  38.  *
  39.  *
  40.  * Instruction compaction capabilities vary subtly by generation.
  41.  *
  42.  * G45's support for instruction compaction is very limited. Jump counts on
  43.  * this generation are in units of 16-byte uncompacted instructions. As such,
  44.  * all jump targets must be 16-byte aligned. Also, all instructions must be
  45.  * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
  46.  * A G45-only instruction, NENOP, must be used to provide padding to align
  47.  * uncompacted instructions.
  48.  *
  49.  * Gen5 removes these restrictions and changes jump counts to be in units of
  50.  * 8-byte compacted instructions, allowing jump targets to be only 8-byte
  51.  * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
  52.  *
  53.  * Gen6 adds the ability to compact instructions with a limited range of
  54.  * immediate values. Compactable immediates have 12 unrestricted bits, and a
  55.  * 13th bit that's replicated through the high 20 bits, to create the 32-bit
  56.  * value of DW3 in the uncompacted instruction word.
  57.  *
  58.  * On Gen7 we can compact some control flow instructions with a small positive
  59.  * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
  60.  * control flow instructions with UIP cannot be compacted, because of the
  61.  * replicated 13th bit. No control flow instructions can be compacted on Gen6
  62.  * since the jump count field is not in DW3.
  63.  *
  64.  *    break    JIP/UIP
  65.  *    cont     JIP/UIP
  66.  *    halt     JIP/UIP
  67.  *    if       JIP/UIP
  68.  *    else     JIP (plus UIP on BDW+)
  69.  *    endif    JIP
  70.  *    while    JIP (must be negative)
  71.  *
  72.  * Gen 8 adds support for compacting 3-src instructions.
  73.  */
  74.  
  75. #include "brw_context.h"
  76. #include "brw_eu.h"
  77. #include "intel_asm_annotation.h"
  78. #include "util/u_atomic.h" /* for p_atomic_cmpxchg */
  79.  
  80. static const uint32_t g45_control_index_table[32] = {
  81.    0b00000000000000000,
  82.    0b01000000000000000,
  83.    0b00110000000000000,
  84.    0b00000000000000010,
  85.    0b00100000000000000,
  86.    0b00010000000000000,
  87.    0b01000000000100000,
  88.    0b01000000100000000,
  89.    0b01010000000100000,
  90.    0b00000000100000010,
  91.    0b11000000000000000,
  92.    0b00001000100000010,
  93.    0b01001000100000000,
  94.    0b00000000100000000,
  95.    0b11000000000100000,
  96.    0b00001000100000000,
  97.    0b10110000000000000,
  98.    0b11010000000100000,
  99.    0b00110000100000000,
  100.    0b00100000100000000,
  101.    0b01000000000001000,
  102.    0b01000000000000100,
  103.    0b00111100000000000,
  104.    0b00101011000000000,
  105.    0b00110000000010000,
  106.    0b00010000100000000,
  107.    0b01000000000100100,
  108.    0b01000000000101000,
  109.    0b00110000000000110,
  110.    0b00000000000001010,
  111.    0b01010000000101000,
  112.    0b01010000000100100
  113. };
  114.  
  115. static const uint32_t g45_datatype_table[32] = {
  116.    0b001000000000100001,
  117.    0b001011010110101101,
  118.    0b001000001000110001,
  119.    0b001111011110111101,
  120.    0b001011010110101100,
  121.    0b001000000110101101,
  122.    0b001000000000100000,
  123.    0b010100010110110001,
  124.    0b001100011000101101,
  125.    0b001000000000100010,
  126.    0b001000001000110110,
  127.    0b010000001000110001,
  128.    0b001000001000110010,
  129.    0b011000001000110010,
  130.    0b001111011110111100,
  131.    0b001000000100101000,
  132.    0b010100011000110001,
  133.    0b001010010100101001,
  134.    0b001000001000101001,
  135.    0b010000001000110110,
  136.    0b101000001000110001,
  137.    0b001011011000101101,
  138.    0b001000000100001001,
  139.    0b001011011000101100,
  140.    0b110100011000110001,
  141.    0b001000001110111101,
  142.    0b110000001000110001,
  143.    0b011000000100101010,
  144.    0b101000001000101001,
  145.    0b001011010110001100,
  146.    0b001000000110100001,
  147.    0b001010010100001000
  148. };
  149.  
  150. static const uint16_t g45_subreg_table[32] = {
  151.    0b000000000000000,
  152.    0b000000010000000,
  153.    0b000001000000000,
  154.    0b000100000000000,
  155.    0b000000000100000,
  156.    0b100000000000000,
  157.    0b000000000010000,
  158.    0b001100000000000,
  159.    0b001010000000000,
  160.    0b000000100000000,
  161.    0b001000000000000,
  162.    0b000000000001000,
  163.    0b000000001000000,
  164.    0b000000000000001,
  165.    0b000010000000000,
  166.    0b000000010100000,
  167.    0b000000000000111,
  168.    0b000001000100000,
  169.    0b011000000000000,
  170.    0b000000110000000,
  171.    0b000000000000010,
  172.    0b000000000000100,
  173.    0b000000001100000,
  174.    0b000100000000010,
  175.    0b001110011000110,
  176.    0b001110100001000,
  177.    0b000110011000110,
  178.    0b000001000011000,
  179.    0b000110010000100,
  180.    0b001100000000110,
  181.    0b000000010000110,
  182.    0b000001000110000
  183. };
  184.  
  185. static const uint16_t g45_src_index_table[32] = {
  186.    0b000000000000,
  187.    0b010001101000,
  188.    0b010110001000,
  189.    0b011010010000,
  190.    0b001101001000,
  191.    0b010110001010,
  192.    0b010101110000,
  193.    0b011001111000,
  194.    0b001000101000,
  195.    0b000000101000,
  196.    0b010001010000,
  197.    0b111101101100,
  198.    0b010110001100,
  199.    0b010001101100,
  200.    0b011010010100,
  201.    0b010001001100,
  202.    0b001100101000,
  203.    0b000000000010,
  204.    0b111101001100,
  205.    0b011001101000,
  206.    0b010101001000,
  207.    0b000000000100,
  208.    0b000000101100,
  209.    0b010001101010,
  210.    0b000000111000,
  211.    0b010101011000,
  212.    0b000100100000,
  213.    0b010110000000,
  214.    0b010000000100,
  215.    0b010000111000,
  216.    0b000101100000,
  217.    0b111101110100
  218. };
  219.  
  220. static const uint32_t gen6_control_index_table[32] = {
  221.    0b00000000000000000,
  222.    0b01000000000000000,
  223.    0b00110000000000000,
  224.    0b00000000100000000,
  225.    0b00010000000000000,
  226.    0b00001000100000000,
  227.    0b00000000100000010,
  228.    0b00000000000000010,
  229.    0b01000000100000000,
  230.    0b01010000000000000,
  231.    0b10110000000000000,
  232.    0b00100000000000000,
  233.    0b11010000000000000,
  234.    0b11000000000000000,
  235.    0b01001000100000000,
  236.    0b01000000000001000,
  237.    0b01000000000000100,
  238.    0b00000000000001000,
  239.    0b00000000000000100,
  240.    0b00111000100000000,
  241.    0b00001000100000010,
  242.    0b00110000100000000,
  243.    0b00110000000000001,
  244.    0b00100000000000001,
  245.    0b00110000000000010,
  246.    0b00110000000000101,
  247.    0b00110000000001001,
  248.    0b00110000000010000,
  249.    0b00110000000000011,
  250.    0b00110000000000100,
  251.    0b00110000100001000,
  252.    0b00100000000001001
  253. };
  254.  
  255. static const uint32_t gen6_datatype_table[32] = {
  256.    0b001001110000000000,
  257.    0b001000110000100000,
  258.    0b001001110000000001,
  259.    0b001000000001100000,
  260.    0b001010110100101001,
  261.    0b001000000110101101,
  262.    0b001100011000101100,
  263.    0b001011110110101101,
  264.    0b001000000111101100,
  265.    0b001000000001100001,
  266.    0b001000110010100101,
  267.    0b001000000001000001,
  268.    0b001000001000110001,
  269.    0b001000001000101001,
  270.    0b001000000000100000,
  271.    0b001000001000110010,
  272.    0b001010010100101001,
  273.    0b001011010010100101,
  274.    0b001000000110100101,
  275.    0b001100011000101001,
  276.    0b001011011000101100,
  277.    0b001011010110100101,
  278.    0b001011110110100101,
  279.    0b001111011110111101,
  280.    0b001111011110111100,
  281.    0b001111011110111101,
  282.    0b001111011110011101,
  283.    0b001111011110111110,
  284.    0b001000000000100001,
  285.    0b001000000000100010,
  286.    0b001001111111011101,
  287.    0b001000001110111110,
  288. };
  289.  
  290. static const uint16_t gen6_subreg_table[32] = {
  291.    0b000000000000000,
  292.    0b000000000000100,
  293.    0b000000110000000,
  294.    0b111000000000000,
  295.    0b011110000001000,
  296.    0b000010000000000,
  297.    0b000000000010000,
  298.    0b000110000001100,
  299.    0b001000000000000,
  300.    0b000001000000000,
  301.    0b000001010010100,
  302.    0b000000001010110,
  303.    0b010000000000000,
  304.    0b110000000000000,
  305.    0b000100000000000,
  306.    0b000000010000000,
  307.    0b000000000001000,
  308.    0b100000000000000,
  309.    0b000001010000000,
  310.    0b001010000000000,
  311.    0b001100000000000,
  312.    0b000000001010100,
  313.    0b101101010010100,
  314.    0b010100000000000,
  315.    0b000000010001111,
  316.    0b011000000000000,
  317.    0b111110000000000,
  318.    0b101000000000000,
  319.    0b000000000001111,
  320.    0b000100010001111,
  321.    0b001000010001111,
  322.    0b000110000000000,
  323. };
  324.  
  325. static const uint16_t gen6_src_index_table[32] = {
  326.    0b000000000000,
  327.    0b010110001000,
  328.    0b010001101000,
  329.    0b001000101000,
  330.    0b011010010000,
  331.    0b000100100000,
  332.    0b010001101100,
  333.    0b010101110000,
  334.    0b011001111000,
  335.    0b001100101000,
  336.    0b010110001100,
  337.    0b001000100000,
  338.    0b010110001010,
  339.    0b000000000010,
  340.    0b010101010000,
  341.    0b010101101000,
  342.    0b111101001100,
  343.    0b111100101100,
  344.    0b011001110000,
  345.    0b010110001001,
  346.    0b010101011000,
  347.    0b001101001000,
  348.    0b010000101100,
  349.    0b010000000000,
  350.    0b001101110000,
  351.    0b001100010000,
  352.    0b001100000000,
  353.    0b010001101010,
  354.    0b001101111000,
  355.    0b000001110000,
  356.    0b001100100000,
  357.    0b001101010000,
  358. };
  359.  
  360. static const uint32_t gen7_control_index_table[32] = {
  361.    0b0000000000000000010,
  362.    0b0000100000000000000,
  363.    0b0000100000000000001,
  364.    0b0000100000000000010,
  365.    0b0000100000000000011,
  366.    0b0000100000000000100,
  367.    0b0000100000000000101,
  368.    0b0000100000000000111,
  369.    0b0000100000000001000,
  370.    0b0000100000000001001,
  371.    0b0000100000000001101,
  372.    0b0000110000000000000,
  373.    0b0000110000000000001,
  374.    0b0000110000000000010,
  375.    0b0000110000000000011,
  376.    0b0000110000000000100,
  377.    0b0000110000000000101,
  378.    0b0000110000000000111,
  379.    0b0000110000000001001,
  380.    0b0000110000000001101,
  381.    0b0000110000000010000,
  382.    0b0000110000100000000,
  383.    0b0001000000000000000,
  384.    0b0001000000000000010,
  385.    0b0001000000000000100,
  386.    0b0001000000100000000,
  387.    0b0010110000000000000,
  388.    0b0010110000000010000,
  389.    0b0011000000000000000,
  390.    0b0011000000100000000,
  391.    0b0101000000000000000,
  392.    0b0101000000100000000
  393. };
  394.  
  395. static const uint32_t gen7_datatype_table[32] = {
  396.    0b001000000000000001,
  397.    0b001000000000100000,
  398.    0b001000000000100001,
  399.    0b001000000001100001,
  400.    0b001000000010111101,
  401.    0b001000001011111101,
  402.    0b001000001110100001,
  403.    0b001000001110100101,
  404.    0b001000001110111101,
  405.    0b001000010000100001,
  406.    0b001000110000100000,
  407.    0b001000110000100001,
  408.    0b001001010010100101,
  409.    0b001001110010100100,
  410.    0b001001110010100101,
  411.    0b001111001110111101,
  412.    0b001111011110011101,
  413.    0b001111011110111100,
  414.    0b001111011110111101,
  415.    0b001111111110111100,
  416.    0b000000001000001100,
  417.    0b001000000000111101,
  418.    0b001000000010100101,
  419.    0b001000010000100000,
  420.    0b001001010010100100,
  421.    0b001001110010000100,
  422.    0b001010010100001001,
  423.    0b001101111110111101,
  424.    0b001111111110111101,
  425.    0b001011110110101100,
  426.    0b001010010100101000,
  427.    0b001010110100101000
  428. };
  429.  
  430. static const uint16_t gen7_subreg_table[32] = {
  431.    0b000000000000000,
  432.    0b000000000000001,
  433.    0b000000000001000,
  434.    0b000000000001111,
  435.    0b000000000010000,
  436.    0b000000010000000,
  437.    0b000000100000000,
  438.    0b000000110000000,
  439.    0b000001000000000,
  440.    0b000001000010000,
  441.    0b000010100000000,
  442.    0b001000000000000,
  443.    0b001000000000001,
  444.    0b001000010000001,
  445.    0b001000010000010,
  446.    0b001000010000011,
  447.    0b001000010000100,
  448.    0b001000010000111,
  449.    0b001000010001000,
  450.    0b001000010001110,
  451.    0b001000010001111,
  452.    0b001000110000000,
  453.    0b001000111101000,
  454.    0b010000000000000,
  455.    0b010000110000000,
  456.    0b011000000000000,
  457.    0b011110010000111,
  458.    0b100000000000000,
  459.    0b101000000000000,
  460.    0b110000000000000,
  461.    0b111000000000000,
  462.    0b111000000011100
  463. };
  464.  
  465. static const uint16_t gen7_src_index_table[32] = {
  466.    0b000000000000,
  467.    0b000000000010,
  468.    0b000000010000,
  469.    0b000000010010,
  470.    0b000000011000,
  471.    0b000000100000,
  472.    0b000000101000,
  473.    0b000001001000,
  474.    0b000001010000,
  475.    0b000001110000,
  476.    0b000001111000,
  477.    0b001100000000,
  478.    0b001100000010,
  479.    0b001100001000,
  480.    0b001100010000,
  481.    0b001100010010,
  482.    0b001100100000,
  483.    0b001100101000,
  484.    0b001100111000,
  485.    0b001101000000,
  486.    0b001101000010,
  487.    0b001101001000,
  488.    0b001101010000,
  489.    0b001101100000,
  490.    0b001101101000,
  491.    0b001101110000,
  492.    0b001101110001,
  493.    0b001101111000,
  494.    0b010001101000,
  495.    0b010001101001,
  496.    0b010001101010,
  497.    0b010110001000
  498. };
  499.  
  500. static const uint32_t gen8_control_index_table[32] = {
  501.    0b0000000000000000010,
  502.    0b0000100000000000000,
  503.    0b0000100000000000001,
  504.    0b0000100000000000010,
  505.    0b0000100000000000011,
  506.    0b0000100000000000100,
  507.    0b0000100000000000101,
  508.    0b0000100000000000111,
  509.    0b0000100000000001000,
  510.    0b0000100000000001001,
  511.    0b0000100000000001101,
  512.    0b0000110000000000000,
  513.    0b0000110000000000001,
  514.    0b0000110000000000010,
  515.    0b0000110000000000011,
  516.    0b0000110000000000100,
  517.    0b0000110000000000101,
  518.    0b0000110000000000111,
  519.    0b0000110000000001001,
  520.    0b0000110000000001101,
  521.    0b0000110000000010000,
  522.    0b0000110000100000000,
  523.    0b0001000000000000000,
  524.    0b0001000000000000010,
  525.    0b0001000000000000100,
  526.    0b0001000000100000000,
  527.    0b0010110000000000000,
  528.    0b0010110000000010000,
  529.    0b0011000000000000000,
  530.    0b0011000000100000000,
  531.    0b0101000000000000000,
  532.    0b0101000000100000000
  533. };
  534.  
  535. static const uint32_t gen8_datatype_table[32] = {
  536.    0b001000000000000000001,
  537.    0b001000000000001000000,
  538.    0b001000000000001000001,
  539.    0b001000000000011000001,
  540.    0b001000000000101011101,
  541.    0b001000000010111011101,
  542.    0b001000000011101000001,
  543.    0b001000000011101000101,
  544.    0b001000000011101011101,
  545.    0b001000001000001000001,
  546.    0b001000011000001000000,
  547.    0b001000011000001000001,
  548.    0b001000101000101000101,
  549.    0b001000111000101000100,
  550.    0b001000111000101000101,
  551.    0b001011100011101011101,
  552.    0b001011101011100011101,
  553.    0b001011101011101011100,
  554.    0b001011101011101011101,
  555.    0b001011111011101011100,
  556.    0b000000000010000001100,
  557.    0b001000000000001011101,
  558.    0b001000000000101000101,
  559.    0b001000001000001000000,
  560.    0b001000101000101000100,
  561.    0b001000111000100000100,
  562.    0b001001001001000001001,
  563.    0b001010111011101011101,
  564.    0b001011111011101011101,
  565.    0b001001111001101001100,
  566.    0b001001001001001001000,
  567.    0b001001011001001001000
  568. };
  569.  
  570. static const uint16_t gen8_subreg_table[32] = {
  571.    0b000000000000000,
  572.    0b000000000000001,
  573.    0b000000000001000,
  574.    0b000000000001111,
  575.    0b000000000010000,
  576.    0b000000010000000,
  577.    0b000000100000000,
  578.    0b000000110000000,
  579.    0b000001000000000,
  580.    0b000001000010000,
  581.    0b000001010000000,
  582.    0b001000000000000,
  583.    0b001000000000001,
  584.    0b001000010000001,
  585.    0b001000010000010,
  586.    0b001000010000011,
  587.    0b001000010000100,
  588.    0b001000010000111,
  589.    0b001000010001000,
  590.    0b001000010001110,
  591.    0b001000010001111,
  592.    0b001000110000000,
  593.    0b001000111101000,
  594.    0b010000000000000,
  595.    0b010000110000000,
  596.    0b011000000000000,
  597.    0b011110010000111,
  598.    0b100000000000000,
  599.    0b101000000000000,
  600.    0b110000000000000,
  601.    0b111000000000000,
  602.    0b111000000011100
  603. };
  604.  
  605. static const uint16_t gen8_src_index_table[32] = {
  606.    0b000000000000,
  607.    0b000000000010,
  608.    0b000000010000,
  609.    0b000000010010,
  610.    0b000000011000,
  611.    0b000000100000,
  612.    0b000000101000,
  613.    0b000001001000,
  614.    0b000001010000,
  615.    0b000001110000,
  616.    0b000001111000,
  617.    0b001100000000,
  618.    0b001100000010,
  619.    0b001100001000,
  620.    0b001100010000,
  621.    0b001100010010,
  622.    0b001100100000,
  623.    0b001100101000,
  624.    0b001100111000,
  625.    0b001101000000,
  626.    0b001101000010,
  627.    0b001101001000,
  628.    0b001101010000,
  629.    0b001101100000,
  630.    0b001101101000,
  631.    0b001101110000,
  632.    0b001101110001,
  633.    0b001101111000,
  634.    0b010001101000,
  635.    0b010001101001,
  636.    0b010001101010,
  637.    0b010110001000
  638. };
  639.  
  640. /* This is actually the control index table for Cherryview (26 bits), but the
  641.  * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
  642.  * the start.
  643.  *
  644.  * The low 24 bits have the same mappings on both hardware.
  645.  */
  646. static const uint32_t gen8_3src_control_index_table[4] = {
  647.    0b00100000000110000000000001,
  648.    0b00000000000110000000000001,
  649.    0b00000000001000000000000001,
  650.    0b00000000001000000000100001
  651. };
  652.  
  653. /* This is actually the control index table for Cherryview (49 bits), but the
  654.  * only difference from Broadwell (46 bits) is that it has three extra 0-bits
  655.  * at the start.
  656.  *
  657.  * The low 44 bits have the same mappings on both hardware, and since the high
  658.  * three bits on Broadwell are zero, we can reuse Cherryview's table.
  659.  */
  660. static const uint64_t gen8_3src_source_index_table[4] = {
  661.    0b0000001110010011100100111001000001111000000000000,
  662.    0b0000001110010011100100111001000001111000000000010,
  663.    0b0000001110010011100100111001000001111000000001000,
  664.    0b0000001110010011100100111001000001111000000100000
  665. };
  666.  
  667. static const uint32_t *control_index_table;
  668. static const uint32_t *datatype_table;
  669. static const uint16_t *subreg_table;
  670. static const uint16_t *src_index_table;
  671.  
  672. static bool
  673. set_control_index(const struct brw_device_info *devinfo,
  674.                   brw_compact_inst *dst, brw_inst *src)
  675. {
  676.    uint32_t uncompacted = devinfo->gen >= 8  /* 17b/G45; 19b/IVB+ */
  677.       ? (brw_inst_bits(src, 33, 31) << 16) | /*  3b */
  678.         (brw_inst_bits(src, 23, 12) <<  4) | /* 12b */
  679.         (brw_inst_bits(src, 10,  9) <<  2) | /*  2b */
  680.         (brw_inst_bits(src, 34, 34) <<  1) | /*  1b */
  681.         (brw_inst_bits(src,  8,  8))         /*  1b */
  682.       : (brw_inst_bits(src, 31, 31) << 16) | /*  1b */
  683.         (brw_inst_bits(src, 23,  8));        /* 16b */
  684.  
  685.    /* On gen7, the flag register and subregister numbers are integrated into
  686.     * the control index.
  687.     */
  688.    if (devinfo->gen == 7)
  689.       uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
  690.  
  691.    for (int i = 0; i < 32; i++) {
  692.       if (control_index_table[i] == uncompacted) {
  693.          brw_compact_inst_set_control_index(dst, i);
  694.          return true;
  695.       }
  696.    }
  697.  
  698.    return false;
  699. }
  700.  
  701. static bool
  702. set_datatype_index(const struct brw_device_info *devinfo, brw_compact_inst *dst,
  703.                    brw_inst *src)
  704. {
  705.    uint32_t uncompacted = devinfo->gen >= 8  /* 18b/G45+; 21b/BDW+ */
  706.       ? (brw_inst_bits(src, 63, 61) << 18) | /*  3b */
  707.         (brw_inst_bits(src, 94, 89) << 12) | /*  6b */
  708.         (brw_inst_bits(src, 46, 35))         /* 12b */
  709.       : (brw_inst_bits(src, 63, 61) << 15) | /*  3b */
  710.         (brw_inst_bits(src, 46, 32));        /* 15b */
  711.  
  712.    for (int i = 0; i < 32; i++) {
  713.       if (datatype_table[i] == uncompacted) {
  714.          brw_compact_inst_set_datatype_index(dst, i);
  715.          return true;
  716.       }
  717.    }
  718.  
  719.    return false;
  720. }
  721.  
  722. static bool
  723. set_subreg_index(const struct brw_device_info *devinfo, brw_compact_inst *dst,
  724.                  brw_inst *src, bool is_immediate)
  725. {
  726.    uint16_t uncompacted =                 /* 15b */
  727.       (brw_inst_bits(src, 52, 48) << 0) | /*  5b */
  728.       (brw_inst_bits(src, 68, 64) << 5);  /*  5b */
  729.  
  730.    if (!is_immediate)
  731.       uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
  732.  
  733.    for (int i = 0; i < 32; i++) {
  734.       if (subreg_table[i] == uncompacted) {
  735.          brw_compact_inst_set_subreg_index(dst, i);
  736.          return true;
  737.       }
  738.    }
  739.  
  740.    return false;
  741. }
  742.  
  743. static bool
  744. get_src_index(uint16_t uncompacted,
  745.               uint16_t *compacted)
  746. {
  747.    for (int i = 0; i < 32; i++) {
  748.       if (src_index_table[i] == uncompacted) {
  749.          *compacted = i;
  750.          return true;
  751.       }
  752.    }
  753.  
  754.    return false;
  755. }
  756.  
  757. static bool
  758. set_src0_index(const struct brw_device_info *devinfo,
  759.                brw_compact_inst *dst, brw_inst *src)
  760. {
  761.    uint16_t compacted;
  762.    uint16_t uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
  763.  
  764.    if (!get_src_index(uncompacted, &compacted))
  765.       return false;
  766.  
  767.    brw_compact_inst_set_src0_index(dst, compacted);
  768.  
  769.    return true;
  770. }
  771.  
  772. static bool
  773. set_src1_index(const struct brw_device_info *devinfo, brw_compact_inst *dst,
  774.                brw_inst *src, bool is_immediate)
  775. {
  776.    uint16_t compacted;
  777.  
  778.    if (is_immediate) {
  779.       compacted = (brw_inst_imm_ud(devinfo, src) >> 8) & 0x1f;
  780.    } else {
  781.       uint16_t uncompacted = brw_inst_bits(src, 120, 109); /* 12b */
  782.  
  783.       if (!get_src_index(uncompacted, &compacted))
  784.          return false;
  785.    }
  786.  
  787.    brw_compact_inst_set_src1_index(dst, compacted);
  788.  
  789.    return true;
  790. }
  791.  
  792. static bool
  793. set_3src_control_index(const struct brw_device_info *devinfo,
  794.                        brw_compact_inst *dst, brw_inst *src)
  795. {
  796.    assert(devinfo->gen >= 8);
  797.  
  798.    uint32_t uncompacted =                  /* 24b/BDW; 26b/CHV */
  799.       (brw_inst_bits(src, 34, 32) << 21) | /*  3b */
  800.       (brw_inst_bits(src, 28,  8));        /* 21b */
  801.  
  802.    if (devinfo->gen >= 9 || devinfo->is_cherryview)
  803.       uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */
  804.  
  805.    for (int i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
  806.       if (gen8_3src_control_index_table[i] == uncompacted) {
  807.          brw_compact_inst_set_3src_control_index(dst, i);
  808.          return true;
  809.       }
  810.    }
  811.  
  812.    return false;
  813. }
  814.  
  815. static bool
  816. set_3src_source_index(const struct brw_device_info *devinfo,
  817.                       brw_compact_inst *dst, brw_inst *src)
  818. {
  819.    assert(devinfo->gen >= 8);
  820.  
  821.    uint64_t uncompacted =                    /* 46b/BDW; 49b/CHV */
  822.       (brw_inst_bits(src,  83,  83) << 43) | /*  1b */
  823.       (brw_inst_bits(src, 114, 107) << 35) | /*  8b */
  824.       (brw_inst_bits(src,  93,  86) << 27) | /*  8b */
  825.       (brw_inst_bits(src,  72,  65) << 19) | /*  8b */
  826.       (brw_inst_bits(src,  55,  37));        /* 19b */
  827.  
  828.    if (devinfo->gen >= 9 || devinfo->is_cherryview) {
  829.       uncompacted |=
  830.          (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
  831.          (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
  832.          (brw_inst_bits(src,  84,  84) << 44);  /* 1b */
  833.    } else {
  834.       uncompacted |=
  835.          (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
  836.          (brw_inst_bits(src, 104, 104) << 44);  /* 1b */
  837.    }
  838.  
  839.    for (int i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
  840.       if (gen8_3src_source_index_table[i] == uncompacted) {
  841.          brw_compact_inst_set_3src_source_index(dst, i);
  842.          return true;
  843.       }
  844.    }
  845.  
  846.    return false;
  847. }
  848.  
  849. static bool
  850. has_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src)
  851. {
  852.    /* Check for instruction bits that don't map to any of the fields of the
  853.     * compacted instruction.  The instruction cannot be compacted if any of
  854.     * them are set.  They overlap with:
  855.     *  - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
  856.     *  - Dst.AddrImm[9] (bit 47 on Gen8)
  857.     *  - Src0.AddrImm[9] (bit 95 on Gen8)
  858.     *  - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8)
  859.     *  - UIP[31] (bit 95 on Gen8)
  860.     */
  861.    if (devinfo->gen >= 8) {
  862.       assert(!brw_inst_bits(src, 7,  7));
  863.       return brw_inst_bits(src, 95, 95) ||
  864.              brw_inst_bits(src, 47, 47) ||
  865.              brw_inst_bits(src, 11, 11);
  866.    } else {
  867.       assert(!brw_inst_bits(src, 7,  7) &&
  868.              !(devinfo->gen < 7 && brw_inst_bits(src, 90, 90)));
  869.       return brw_inst_bits(src, 95, 91) ||
  870.              brw_inst_bits(src, 47, 47);
  871.    }
  872. }
  873.  
  874. static bool
  875. has_3src_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src)
  876. {
  877.    /* Check for three-source instruction bits that don't map to any of the
  878.     * fields of the compacted instruction.  All of them seem to be reserved
  879.     * bits currently.
  880.     */
  881.    if (devinfo->gen >= 9 || devinfo->is_cherryview) {
  882.       assert(!brw_inst_bits(src, 127, 127) &&
  883.              !brw_inst_bits(src, 7,  7));
  884.    } else {
  885.       assert(devinfo->gen >= 8);
  886.       assert(!brw_inst_bits(src, 127, 126) &&
  887.              !brw_inst_bits(src, 105, 105) &&
  888.              !brw_inst_bits(src, 84, 84) &&
  889.              !brw_inst_bits(src, 36, 35) &&
  890.              !brw_inst_bits(src, 7,  7));
  891.    }
  892.  
  893.    return false;
  894. }
  895.  
  896. static bool
  897. brw_try_compact_3src_instruction(const struct brw_device_info *devinfo,
  898.                                  brw_compact_inst *dst, brw_inst *src)
  899. {
  900.    assert(devinfo->gen >= 8);
  901.  
  902.    if (has_3src_unmapped_bits(devinfo, src))
  903.       return false;
  904.  
  905. #define compact(field) \
  906.    brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(devinfo, src))
  907.  
  908.    compact(opcode);
  909.  
  910.    if (!set_3src_control_index(devinfo, dst, src))
  911.       return false;
  912.  
  913.    if (!set_3src_source_index(devinfo, dst, src))
  914.       return false;
  915.  
  916.    compact(dst_reg_nr);
  917.    compact(src0_rep_ctrl);
  918.    brw_compact_inst_set_3src_cmpt_control(dst, true);
  919.    compact(debug_control);
  920.    compact(saturate);
  921.    compact(src1_rep_ctrl);
  922.    compact(src2_rep_ctrl);
  923.    compact(src0_reg_nr);
  924.    compact(src1_reg_nr);
  925.    compact(src2_reg_nr);
  926.    compact(src0_subreg_nr);
  927.    compact(src1_subreg_nr);
  928.    compact(src2_subreg_nr);
  929.  
  930. #undef compact
  931.  
  932.    return true;
  933. }
  934.  
  935. /* Compacted instructions have 12-bits for immediate sources, and a 13th bit
  936.  * that's replicated through the high 20 bits.
  937.  *
  938.  * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
  939.  * of packed vectors as compactable immediates.
  940.  */
  941. static bool
  942. is_compactable_immediate(unsigned imm)
  943. {
  944.    /* We get the low 12 bits as-is. */
  945.    imm &= ~0xfff;
  946.  
  947.    /* We get one bit replicated through the top 20 bits. */
  948.    return imm == 0 || imm == 0xfffff000;
  949. }
  950.  
  951. /* Returns whether an opcode takes three sources. */
  952. static bool
  953. is_3src(uint32_t op)
  954. {
  955.    return opcode_descs[op].nsrc == 3;
  956. }
  957.  
  958. /**
  959.  * Tries to compact instruction src into dst.
  960.  *
  961.  * It doesn't modify dst unless src is compactable, which is relied on by
  962.  * brw_compact_instructions().
  963.  */
  964. bool
  965. brw_try_compact_instruction(const struct brw_device_info *devinfo,
  966.                             brw_compact_inst *dst, brw_inst *src)
  967. {
  968.    brw_compact_inst temp;
  969.  
  970.    assert(brw_inst_cmpt_control(devinfo, src) == 0);
  971.  
  972.    if (is_3src(brw_inst_opcode(devinfo, src))) {
  973.       if (devinfo->gen >= 8) {
  974.          memset(&temp, 0, sizeof(temp));
  975.          if (brw_try_compact_3src_instruction(devinfo, &temp, src)) {
  976.             *dst = temp;
  977.             return true;
  978.          } else {
  979.             return false;
  980.          }
  981.       } else {
  982.          return false;
  983.       }
  984.    }
  985.  
  986.    bool is_immediate =
  987.       brw_inst_src0_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE ||
  988.       brw_inst_src1_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE;
  989.    if (is_immediate &&
  990.        (devinfo->gen < 6 ||
  991.         !is_compactable_immediate(brw_inst_imm_ud(devinfo, src)))) {
  992.       return false;
  993.    }
  994.  
  995.    if (has_unmapped_bits(devinfo, src))
  996.       return false;
  997.  
  998.    memset(&temp, 0, sizeof(temp));
  999.  
  1000.    brw_compact_inst_set_opcode(&temp, brw_inst_opcode(devinfo, src));
  1001.    brw_compact_inst_set_debug_control(&temp, brw_inst_debug_control(devinfo, src));
  1002.    if (!set_control_index(devinfo, &temp, src))
  1003.       return false;
  1004.    if (!set_datatype_index(devinfo, &temp, src))
  1005.       return false;
  1006.    if (!set_subreg_index(devinfo, &temp, src, is_immediate))
  1007.       return false;
  1008.    brw_compact_inst_set_acc_wr_control(&temp,
  1009.                                        brw_inst_acc_wr_control(devinfo, src));
  1010.    brw_compact_inst_set_cond_modifier(&temp,
  1011.                                       brw_inst_cond_modifier(devinfo, src));
  1012.    if (devinfo->gen <= 6)
  1013.       brw_compact_inst_set_flag_subreg_nr(&temp,
  1014.                                           brw_inst_flag_subreg_nr(devinfo, src));
  1015.    brw_compact_inst_set_cmpt_control(&temp, true);
  1016.    if (!set_src0_index(devinfo, &temp, src))
  1017.       return false;
  1018.    if (!set_src1_index(devinfo, &temp, src, is_immediate))
  1019.       return false;
  1020.    brw_compact_inst_set_dst_reg_nr(&temp, brw_inst_dst_da_reg_nr(devinfo, src));
  1021.    brw_compact_inst_set_src0_reg_nr(&temp, brw_inst_src0_da_reg_nr(devinfo, src));
  1022.    if (is_immediate) {
  1023.       brw_compact_inst_set_src1_reg_nr(&temp,
  1024.                                        brw_inst_imm_ud(devinfo, src) & 0xff);
  1025.    } else {
  1026.       brw_compact_inst_set_src1_reg_nr(&temp,
  1027.                                        brw_inst_src1_da_reg_nr(devinfo, src));
  1028.    }
  1029.  
  1030.    *dst = temp;
  1031.  
  1032.    return true;
  1033. }
  1034.  
  1035. static void
  1036. set_uncompacted_control(const struct brw_device_info *devinfo, brw_inst *dst,
  1037.                         brw_compact_inst *src)
  1038. {
  1039.    uint32_t uncompacted =
  1040.       control_index_table[brw_compact_inst_control_index(src)];
  1041.  
  1042.    if (devinfo->gen >= 8) {
  1043.       brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
  1044.       brw_inst_set_bits(dst, 23, 12, (uncompacted >>  4) & 0xfff);
  1045.       brw_inst_set_bits(dst, 10,  9, (uncompacted >>  2) & 0x3);
  1046.       brw_inst_set_bits(dst, 34, 34, (uncompacted >>  1) & 0x1);
  1047.       brw_inst_set_bits(dst,  8,  8, (uncompacted >>  0) & 0x1);
  1048.    } else {
  1049.       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
  1050.       brw_inst_set_bits(dst, 23,  8, (uncompacted & 0xffff));
  1051.  
  1052.       if (devinfo->gen == 7)
  1053.          brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
  1054.    }
  1055. }
  1056.  
  1057. static void
  1058. set_uncompacted_datatype(const struct brw_device_info *devinfo, brw_inst *dst,
  1059.                          brw_compact_inst *src)
  1060. {
  1061.    uint32_t uncompacted = datatype_table[brw_compact_inst_datatype_index(src)];
  1062.  
  1063.    if (devinfo->gen >= 8) {
  1064.       brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
  1065.       brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
  1066.       brw_inst_set_bits(dst, 46, 35, (uncompacted >>  0) & 0xfff);
  1067.    } else {
  1068.       brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
  1069.       brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
  1070.    }
  1071. }
  1072.  
  1073. static void
  1074. set_uncompacted_subreg(const struct brw_device_info *devinfo, brw_inst *dst,
  1075.                        brw_compact_inst *src)
  1076. {
  1077.    uint16_t uncompacted = subreg_table[brw_compact_inst_subreg_index(src)];
  1078.  
  1079.    brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
  1080.    brw_inst_set_bits(dst,  68, 64, (uncompacted >>  5) & 0x1f);
  1081.    brw_inst_set_bits(dst,  52, 48, (uncompacted >>  0) & 0x1f);
  1082. }
  1083.  
  1084. static void
  1085. set_uncompacted_src0(const struct brw_device_info *devinfo, brw_inst *dst,
  1086.                      brw_compact_inst *src)
  1087. {
  1088.    uint32_t compacted = brw_compact_inst_src0_index(src);
  1089.    uint16_t uncompacted = src_index_table[compacted];
  1090.  
  1091.    brw_inst_set_bits(dst, 88, 77, uncompacted);
  1092. }
  1093.  
  1094. static void
  1095. set_uncompacted_src1(const struct brw_device_info *devinfo, brw_inst *dst,
  1096.                      brw_compact_inst *src, bool is_immediate)
  1097. {
  1098.    if (is_immediate) {
  1099.       signed high5 = brw_compact_inst_src1_index(src);
  1100.       /* Replicate top bit of src1_index into high 20 bits of the immediate. */
  1101.       brw_inst_set_imm_ud(devinfo, dst, (high5 << 27) >> 19);
  1102.    } else {
  1103.       uint16_t uncompacted = src_index_table[brw_compact_inst_src1_index(src)];
  1104.  
  1105.       brw_inst_set_bits(dst, 120, 109, uncompacted);
  1106.    }
  1107. }
  1108.  
  1109. static void
  1110. set_uncompacted_3src_control_index(const struct brw_device_info *devinfo,
  1111.                                    brw_inst *dst, brw_compact_inst *src)
  1112. {
  1113.    assert(devinfo->gen >= 8);
  1114.  
  1115.    uint32_t compacted = brw_compact_inst_3src_control_index(src);
  1116.    uint32_t uncompacted = gen8_3src_control_index_table[compacted];
  1117.  
  1118.    brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
  1119.    brw_inst_set_bits(dst, 28,  8, (uncompacted >>  0) & 0x1fffff);
  1120.  
  1121.    if (devinfo->gen >= 9 || devinfo->is_cherryview)
  1122.       brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
  1123. }
  1124.  
  1125. static void
  1126. set_uncompacted_3src_source_index(const struct brw_device_info *devinfo,
  1127.                                   brw_inst *dst, brw_compact_inst *src)
  1128. {
  1129.    assert(devinfo->gen >= 8);
  1130.  
  1131.    uint32_t compacted = brw_compact_inst_3src_source_index(src);
  1132.    uint64_t uncompacted = gen8_3src_source_index_table[compacted];
  1133.  
  1134.    brw_inst_set_bits(dst,  83,  83, (uncompacted >> 43) & 0x1);
  1135.    brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
  1136.    brw_inst_set_bits(dst,  93,  86, (uncompacted >> 27) & 0xff);
  1137.    brw_inst_set_bits(dst,  72,  65, (uncompacted >> 19) & 0xff);
  1138.    brw_inst_set_bits(dst,  55,  37, (uncompacted >>  0) & 0x7ffff);
  1139.  
  1140.    if (devinfo->gen >= 9 || devinfo->is_cherryview) {
  1141.       brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
  1142.       brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
  1143.       brw_inst_set_bits(dst,  84,  84, (uncompacted >> 44) & 0x1);
  1144.    } else {
  1145.       brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
  1146.       brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
  1147.    }
  1148. }
  1149.  
  1150. static void
  1151. brw_uncompact_3src_instruction(const struct brw_device_info *devinfo,
  1152.                                brw_inst *dst, brw_compact_inst *src)
  1153. {
  1154.    assert(devinfo->gen >= 8);
  1155.  
  1156. #define uncompact(field) \
  1157.    brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(src))
  1158.  
  1159.    uncompact(opcode);
  1160.  
  1161.    set_uncompacted_3src_control_index(devinfo, dst, src);
  1162.    set_uncompacted_3src_source_index(devinfo, dst, src);
  1163.  
  1164.    uncompact(dst_reg_nr);
  1165.    uncompact(src0_rep_ctrl);
  1166.    brw_inst_set_3src_cmpt_control(devinfo, dst, false);
  1167.    uncompact(debug_control);
  1168.    uncompact(saturate);
  1169.    uncompact(src1_rep_ctrl);
  1170.    uncompact(src2_rep_ctrl);
  1171.    uncompact(src0_reg_nr);
  1172.    uncompact(src1_reg_nr);
  1173.    uncompact(src2_reg_nr);
  1174.    uncompact(src0_subreg_nr);
  1175.    uncompact(src1_subreg_nr);
  1176.    uncompact(src2_subreg_nr);
  1177.  
  1178. #undef uncompact
  1179. }
  1180.  
  1181. void
  1182. brw_uncompact_instruction(const struct brw_device_info *devinfo, brw_inst *dst,
  1183.                           brw_compact_inst *src)
  1184. {
  1185.    memset(dst, 0, sizeof(*dst));
  1186.  
  1187.    if (devinfo->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(src))) {
  1188.       brw_uncompact_3src_instruction(devinfo, dst, src);
  1189.       return;
  1190.    }
  1191.  
  1192.    brw_inst_set_opcode(devinfo, dst, brw_compact_inst_opcode(src));
  1193.    brw_inst_set_debug_control(devinfo, dst, brw_compact_inst_debug_control(src));
  1194.  
  1195.    set_uncompacted_control(devinfo, dst, src);
  1196.    set_uncompacted_datatype(devinfo, dst, src);
  1197.  
  1198.    /* src0/1 register file fields are in the datatype table. */
  1199.    bool is_immediate = brw_inst_src0_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE ||
  1200.                        brw_inst_src1_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE;
  1201.  
  1202.    set_uncompacted_subreg(devinfo, dst, src);
  1203.    brw_inst_set_acc_wr_control(devinfo, dst, brw_compact_inst_acc_wr_control(src));
  1204.    brw_inst_set_cond_modifier(devinfo, dst, brw_compact_inst_cond_modifier(src));
  1205.    if (devinfo->gen <= 6)
  1206.       brw_inst_set_flag_subreg_nr(devinfo, dst,
  1207.                                   brw_compact_inst_flag_subreg_nr(src));
  1208.    set_uncompacted_src0(devinfo, dst, src);
  1209.    set_uncompacted_src1(devinfo, dst, src, is_immediate);
  1210.    brw_inst_set_dst_da_reg_nr(devinfo, dst, brw_compact_inst_dst_reg_nr(src));
  1211.    brw_inst_set_src0_da_reg_nr(devinfo, dst, brw_compact_inst_src0_reg_nr(src));
  1212.    if (is_immediate) {
  1213.       brw_inst_set_imm_ud(devinfo, dst,
  1214.                           brw_inst_imm_ud(devinfo, dst) |
  1215.                           brw_compact_inst_src1_reg_nr(src));
  1216.    } else {
  1217.       brw_inst_set_src1_da_reg_nr(devinfo, dst, brw_compact_inst_src1_reg_nr(src));
  1218.    }
  1219. }
  1220.  
  1221. void brw_debug_compact_uncompact(const struct brw_device_info *devinfo,
  1222.                                  brw_inst *orig,
  1223.                                  brw_inst *uncompacted)
  1224. {
  1225.    fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
  1226.            devinfo->gen);
  1227.  
  1228.    fprintf(stderr, "  before: ");
  1229.    brw_disassemble_inst(stderr, devinfo, orig, true);
  1230.  
  1231.    fprintf(stderr, "  after:  ");
  1232.    brw_disassemble_inst(stderr, devinfo, uncompacted, false);
  1233.  
  1234.    uint32_t *before_bits = (uint32_t *)orig;
  1235.    uint32_t *after_bits = (uint32_t *)uncompacted;
  1236.    fprintf(stderr, "  changed bits:\n");
  1237.    for (int i = 0; i < 128; i++) {
  1238.       uint32_t before = before_bits[i / 32] & (1 << (i & 31));
  1239.       uint32_t after = after_bits[i / 32] & (1 << (i & 31));
  1240.  
  1241.       if (before != after) {
  1242.          fprintf(stderr, "  bit %d, %s to %s\n", i,
  1243.                  before ? "set" : "unset",
  1244.                  after ? "set" : "unset");
  1245.       }
  1246.    }
  1247. }
  1248.  
  1249. static int
  1250. compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
  1251. {
  1252.    int this_compacted_count = compacted_counts[old_ip];
  1253.    int target_compacted_count = compacted_counts[old_target_ip];
  1254.    return target_compacted_count - this_compacted_count;
  1255. }
  1256.  
  1257. static void
  1258. update_uip_jip(const struct brw_device_info *devinfo, brw_inst *insn,
  1259.                int this_old_ip, int *compacted_counts)
  1260. {
  1261.    /* JIP and UIP are in units of:
  1262.     *    - bytes on Gen8+; and
  1263.     *    - compacted instructions on Gen6+.
  1264.     */
  1265.    int shift = devinfo->gen >= 8 ? 3 : 0;
  1266.  
  1267.    int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
  1268.    jip_compacted -= compacted_between(this_old_ip,
  1269.                                       this_old_ip + (jip_compacted / 2),
  1270.                                       compacted_counts);
  1271.    brw_inst_set_jip(devinfo, insn, jip_compacted << shift);
  1272.  
  1273.    if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ENDIF ||
  1274.        brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE ||
  1275.        (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && devinfo->gen <= 7))
  1276.       return;
  1277.  
  1278.    int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
  1279.    uip_compacted -= compacted_between(this_old_ip,
  1280.                                       this_old_ip + (uip_compacted / 2),
  1281.                                       compacted_counts);
  1282.    brw_inst_set_uip(devinfo, insn, uip_compacted << shift);
  1283. }
  1284.  
  1285. static void
  1286. update_gen4_jump_count(const struct brw_device_info *devinfo, brw_inst *insn,
  1287.                        int this_old_ip, int *compacted_counts)
  1288. {
  1289.    assert(devinfo->gen == 5 || devinfo->is_g4x);
  1290.  
  1291.    /* Jump Count is in units of:
  1292.     *    - uncompacted instructions on G45; and
  1293.     *    - compacted instructions on Gen5.
  1294.     */
  1295.    int shift = devinfo->is_g4x ? 1 : 0;
  1296.  
  1297.    int jump_count_compacted = brw_inst_gen4_jump_count(devinfo, insn) << shift;
  1298.  
  1299.    int target_old_ip = this_old_ip + (jump_count_compacted / 2);
  1300.  
  1301.    int this_compacted_count = compacted_counts[this_old_ip];
  1302.    int target_compacted_count = compacted_counts[target_old_ip];
  1303.  
  1304.    jump_count_compacted -= (target_compacted_count - this_compacted_count);
  1305.    brw_inst_set_gen4_jump_count(devinfo, insn, jump_count_compacted >> shift);
  1306. }
  1307.  
  1308. void
  1309. brw_init_compaction_tables(const struct brw_device_info *devinfo)
  1310. {
  1311.    static bool initialized;
  1312.    if (initialized || p_atomic_cmpxchg(&initialized, false, true) != false)
  1313.       return;
  1314.  
  1315.    assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
  1316.    assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
  1317.    assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
  1318.    assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
  1319.    assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
  1320.    assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
  1321.    assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
  1322.    assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
  1323.    assert(gen7_control_index_table[ARRAY_SIZE(gen7_control_index_table) - 1] != 0);
  1324.    assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0);
  1325.    assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0);
  1326.    assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0);
  1327.    assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] != 0);
  1328.    assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0);
  1329.    assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0);
  1330.    assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
  1331.  
  1332.    switch (devinfo->gen) {
  1333.    case 9:
  1334.    case 8:
  1335.       control_index_table = gen8_control_index_table;
  1336.       datatype_table = gen8_datatype_table;
  1337.       subreg_table = gen8_subreg_table;
  1338.       src_index_table = gen8_src_index_table;
  1339.       break;
  1340.    case 7:
  1341.       control_index_table = gen7_control_index_table;
  1342.       datatype_table = gen7_datatype_table;
  1343.       subreg_table = gen7_subreg_table;
  1344.       src_index_table = gen7_src_index_table;
  1345.       break;
  1346.    case 6:
  1347.       control_index_table = gen6_control_index_table;
  1348.       datatype_table = gen6_datatype_table;
  1349.       subreg_table = gen6_subreg_table;
  1350.       src_index_table = gen6_src_index_table;
  1351.       break;
  1352.    case 5:
  1353.    case 4:
  1354.       control_index_table = g45_control_index_table;
  1355.       datatype_table = g45_datatype_table;
  1356.       subreg_table = g45_subreg_table;
  1357.       src_index_table = g45_src_index_table;
  1358.       break;
  1359.    default:
  1360.       unreachable("unknown generation");
  1361.    }
  1362. }
  1363.  
  1364. void
  1365. brw_compact_instructions(struct brw_codegen *p, int start_offset,
  1366.                          int num_annotations, struct annotation *annotation)
  1367. {
  1368.    const struct brw_device_info *devinfo = p->devinfo;
  1369.    void *store = p->store + start_offset / 16;
  1370.    /* For an instruction at byte offset 16*i before compaction, this is the
  1371.     * number of compacted instructions minus the number of padding NOP/NENOPs
  1372.     * that preceded it.
  1373.     */
  1374.    int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)];
  1375.    /* For an instruction at byte offset 8*i after compaction, this was its IP
  1376.     * (in 16-byte units) before compaction.
  1377.     */
  1378.    int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst)];
  1379.  
  1380.    if (devinfo->gen == 4 && !devinfo->is_g4x)
  1381.       return;
  1382.  
  1383.    int offset = 0;
  1384.    int compacted_count = 0;
  1385.    for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
  1386.         src_offset += sizeof(brw_inst)) {
  1387.       brw_inst *src = store + src_offset;
  1388.       void *dst = store + offset;
  1389.  
  1390.       old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
  1391.       compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
  1392.  
  1393.       brw_inst saved = *src;
  1394.  
  1395.       if (brw_try_compact_instruction(devinfo, dst, src)) {
  1396.          compacted_count++;
  1397.  
  1398.          if (INTEL_DEBUG) {
  1399.             brw_inst uncompacted;
  1400.             brw_uncompact_instruction(devinfo, &uncompacted, dst);
  1401.             if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
  1402.                brw_debug_compact_uncompact(devinfo, &saved, &uncompacted);
  1403.             }
  1404.          }
  1405.  
  1406.          offset += sizeof(brw_compact_inst);
  1407.       } else {
  1408.          /* All uncompacted instructions need to be aligned on G45. */
  1409.          if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){
  1410.             brw_compact_inst *align = store + offset;
  1411.             memset(align, 0, sizeof(*align));
  1412.             brw_compact_inst_set_opcode(align, BRW_OPCODE_NENOP);
  1413.             brw_compact_inst_set_cmpt_control(align, true);
  1414.             offset += sizeof(brw_compact_inst);
  1415.             compacted_count--;
  1416.             compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
  1417.             old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
  1418.  
  1419.             dst = store + offset;
  1420.          }
  1421.  
  1422.          /* If we didn't compact this intruction, we need to move it down into
  1423.           * place.
  1424.           */
  1425.          if (offset != src_offset) {
  1426.             memmove(dst, src, sizeof(brw_inst));
  1427.          }
  1428.          offset += sizeof(brw_inst);
  1429.       }
  1430.    }
  1431.  
  1432.    /* Fix up control flow offsets. */
  1433.    p->next_insn_offset = start_offset + offset;
  1434.    for (offset = 0; offset < p->next_insn_offset - start_offset;
  1435.         offset = next_offset(devinfo, store, offset)) {
  1436.       brw_inst *insn = store + offset;
  1437.       int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
  1438.       int this_compacted_count = compacted_counts[this_old_ip];
  1439.  
  1440.       switch (brw_inst_opcode(devinfo, insn)) {
  1441.       case BRW_OPCODE_BREAK:
  1442.       case BRW_OPCODE_CONTINUE:
  1443.       case BRW_OPCODE_HALT:
  1444.          if (devinfo->gen >= 6) {
  1445.             update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
  1446.          } else {
  1447.             update_gen4_jump_count(devinfo, insn, this_old_ip,
  1448.                                    compacted_counts);
  1449.          }
  1450.          break;
  1451.  
  1452.       case BRW_OPCODE_IF:
  1453.       case BRW_OPCODE_IFF:
  1454.       case BRW_OPCODE_ELSE:
  1455.       case BRW_OPCODE_ENDIF:
  1456.       case BRW_OPCODE_WHILE:
  1457.          if (devinfo->gen >= 7) {
  1458.             if (brw_inst_cmpt_control(devinfo, insn)) {
  1459.                brw_inst uncompacted;
  1460.                brw_uncompact_instruction(devinfo, &uncompacted,
  1461.                                          (brw_compact_inst *)insn);
  1462.  
  1463.                update_uip_jip(devinfo, &uncompacted, this_old_ip,
  1464.                               compacted_counts);
  1465.  
  1466.                bool ret = brw_try_compact_instruction(devinfo,
  1467.                                                       (brw_compact_inst *)insn,
  1468.                                                       &uncompacted);
  1469.                assert(ret); (void)ret;
  1470.             } else {
  1471.                update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
  1472.             }
  1473.          } else if (devinfo->gen == 6) {
  1474.             assert(!brw_inst_cmpt_control(devinfo, insn));
  1475.  
  1476.             /* Jump Count is in units of compacted instructions on Gen6. */
  1477.             int jump_count_compacted = brw_inst_gen6_jump_count(devinfo, insn);
  1478.  
  1479.             int target_old_ip = this_old_ip + (jump_count_compacted / 2);
  1480.             int target_compacted_count = compacted_counts[target_old_ip];
  1481.             jump_count_compacted -= (target_compacted_count - this_compacted_count);
  1482.             brw_inst_set_gen6_jump_count(devinfo, insn, jump_count_compacted);
  1483.          } else {
  1484.             update_gen4_jump_count(devinfo, insn, this_old_ip,
  1485.                                    compacted_counts);
  1486.          }
  1487.          break;
  1488.  
  1489.       case BRW_OPCODE_ADD:
  1490.          /* Add instructions modifying the IP register use an immediate src1,
  1491.           * and Gens that use this cannot compact instructions with immediate
  1492.           * operands.
  1493.           */
  1494.          if (brw_inst_cmpt_control(devinfo, insn))
  1495.             break;
  1496.  
  1497.          if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
  1498.              brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
  1499.             assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE);
  1500.  
  1501.             int shift = 3;
  1502.             int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
  1503.  
  1504.             int target_old_ip = this_old_ip + (jump_compacted / 2);
  1505.             int target_compacted_count = compacted_counts[target_old_ip];
  1506.             jump_compacted -= (target_compacted_count - this_compacted_count);
  1507.             brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
  1508.          }
  1509.          break;
  1510.       }
  1511.    }
  1512.  
  1513.    /* p->nr_insn is counting the number of uncompacted instructions still, so
  1514.     * divide.  We do want to be sure there's a valid instruction in any
  1515.     * alignment padding, so that the next compression pass (for the FS 8/16
  1516.     * compile passes) parses correctly.
  1517.     */
  1518.    if (p->next_insn_offset & sizeof(brw_compact_inst)) {
  1519.       brw_compact_inst *align = store + offset;
  1520.       memset(align, 0, sizeof(*align));
  1521.       brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP);
  1522.       brw_compact_inst_set_cmpt_control(align, true);
  1523.       p->next_insn_offset += sizeof(brw_compact_inst);
  1524.    }
  1525.    p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
  1526.  
  1527.    /* Update the instruction offsets for each annotation. */
  1528.    if (annotation) {
  1529.       for (int offset = 0, i = 0; i < num_annotations; i++) {
  1530.          while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
  1531.                 sizeof(brw_inst) != annotation[i].offset) {
  1532.             assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
  1533.                    sizeof(brw_inst) < annotation[i].offset);
  1534.             offset = next_offset(devinfo, store, offset);
  1535.          }
  1536.  
  1537.          annotation[i].offset = start_offset + offset;
  1538.  
  1539.          offset = next_offset(devinfo, store, offset);
  1540.       }
  1541.  
  1542.       annotation[num_annotations].offset = p->next_insn_offset;
  1543.    }
  1544. }
  1545.