Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /**
  29.  * AA point stage:  AA points are converted to quads and rendered with a
  30.  * special fragment shader.  Another approach would be to use a texture
  31.  * map image of a point, but experiments indicate the quality isn't nearly
  32.  * as good as this approach.
  33.  *
  34.  * Note: this looks a lot like draw_aaline.c but there's actually little
  35.  * if any code that can be shared.
  36.  *
  37.  * Authors:  Brian Paul
  38.  */
  39.  
  40.  
  41. #include "pipe/p_context.h"
  42. #include "pipe/p_defines.h"
  43. #include "pipe/p_shader_tokens.h"
  44.  
  45. #include "tgsi/tgsi_transform.h"
  46. #include "tgsi/tgsi_dump.h"
  47.  
  48. #include "util/u_math.h"
  49. #include "util/u_memory.h"
  50.  
  51. #include "draw_context.h"
  52. #include "draw_vs.h"
  53. #include "draw_pipe.h"
  54.  
  55.  
  56. /** Approx number of new tokens for instructions in aa_transform_inst() */
  57. #define NUM_NEW_TOKENS 200
  58.  
  59.  
  60. /*
  61.  * Enabling NORMALIZE might give _slightly_ better results.
  62.  * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
  63.  * d=x*x+y*y.  Since we're working with a unit circle, the later seems
  64.  * close enough and saves some costly instructions.
  65.  */
  66. #define NORMALIZE 0
  67.  
  68.  
  69. /**
  70.  * Subclass of pipe_shader_state to carry extra fragment shader info.
  71.  */
  72. struct aapoint_fragment_shader
  73. {
  74.    struct pipe_shader_state state;
  75.    void *driver_fs;   /**< the regular shader */
  76.    void *aapoint_fs;  /**< the aa point-augmented shader */
  77.    int generic_attrib; /**< The generic input attrib/texcoord we'll use */
  78. };
  79.  
  80.  
  81. /**
  82.  * Subclass of draw_stage
  83.  */
  84. struct aapoint_stage
  85. {
  86.    struct draw_stage stage;
  87.  
  88.    /** half of pipe_rasterizer_state::point_size */
  89.    float radius;
  90.  
  91.    /** vertex attrib slot containing point size */
  92.    int psize_slot;
  93.  
  94.    /** this is the vertex attrib slot for the new texcoords */
  95.    uint tex_slot;
  96.  
  97.    /** vertex attrib slot containing position */
  98.    uint pos_slot;
  99.  
  100.    /** Currently bound fragment shader */
  101.    struct aapoint_fragment_shader *fs;
  102.  
  103.    /*
  104.     * Driver interface/override functions
  105.     */
  106.    void * (*driver_create_fs_state)(struct pipe_context *,
  107.                                     const struct pipe_shader_state *);
  108.    void (*driver_bind_fs_state)(struct pipe_context *, void *);
  109.    void (*driver_delete_fs_state)(struct pipe_context *, void *);
  110. };
  111.  
  112.  
  113.  
  114. /**
  115.  * Subclass of tgsi_transform_context, used for transforming the
  116.  * user's fragment shader to add the special AA instructions.
  117.  */
  118. struct aa_transform_context {
  119.    struct tgsi_transform_context base;
  120.    uint tempsUsed;  /**< bitmask */
  121.    int colorOutput; /**< which output is the primary color */
  122.    int maxInput, maxGeneric;  /**< max input index found */
  123.    int tmp0, colorTemp;  /**< temp registers */
  124.    boolean firstInstruction;
  125. };
  126.  
  127.  
  128. /**
  129.  * TGSI declaration transform callback.
  130.  * Look for two free temp regs and available input reg for new texcoords.
  131.  */
  132. static void
  133. aa_transform_decl(struct tgsi_transform_context *ctx,
  134.                   struct tgsi_full_declaration *decl)
  135. {
  136.    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
  137.  
  138.    if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
  139.        decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
  140.        decl->Semantic.Index == 0) {
  141.       aactx->colorOutput = decl->Range.First;
  142.    }
  143.    else if (decl->Declaration.File == TGSI_FILE_INPUT) {
  144.       if ((int) decl->Range.Last > aactx->maxInput)
  145.          aactx->maxInput = decl->Range.Last;
  146.       if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
  147.            (int) decl->Semantic.Index > aactx->maxGeneric) {
  148.          aactx->maxGeneric = decl->Semantic.Index;
  149.       }
  150.    }
  151.    else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
  152.       uint i;
  153.       for (i = decl->Range.First;
  154.            i <= decl->Range.Last; i++) {
  155.          aactx->tempsUsed |= (1 << i);
  156.       }
  157.    }
  158.  
  159.    ctx->emit_declaration(ctx, decl);
  160. }
  161.  
  162.  
  163. /**
  164.  * TGSI instruction transform callback.
  165.  * Replace writes to result.color w/ a temp reg.
  166.  * Upon END instruction, insert texture sampling code for antialiasing.
  167.  */
  168. static void
  169. aa_transform_inst(struct tgsi_transform_context *ctx,
  170.                   struct tgsi_full_instruction *inst)
  171. {
  172.    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
  173.    struct tgsi_full_instruction newInst;
  174.  
  175.    if (aactx->firstInstruction) {
  176.       /* emit our new declarations before the first instruction */
  177.  
  178.       struct tgsi_full_declaration decl;
  179.       const int texInput = aactx->maxInput + 1;
  180.       int tmp0;
  181.       uint i;
  182.  
  183.       /* find two free temp regs */
  184.       for (i = 0; i < 32; i++) {
  185.          if ((aactx->tempsUsed & (1 << i)) == 0) {
  186.             /* found a free temp */
  187.             if (aactx->tmp0 < 0)
  188.                aactx->tmp0 = i;
  189.             else if (aactx->colorTemp < 0)
  190.                aactx->colorTemp = i;
  191.             else
  192.                break;
  193.          }
  194.       }
  195.  
  196.       assert(aactx->colorTemp != aactx->tmp0);
  197.  
  198.       tmp0 = aactx->tmp0;
  199.  
  200.       /* declare new generic input/texcoord */
  201.       decl = tgsi_default_full_declaration();
  202.       decl.Declaration.File = TGSI_FILE_INPUT;
  203.       /* XXX this could be linear... */
  204.       decl.Declaration.Interpolate = 1;
  205.       decl.Declaration.Semantic = 1;
  206.       decl.Semantic.Name = TGSI_SEMANTIC_GENERIC;
  207.       decl.Semantic.Index = aactx->maxGeneric + 1;
  208.       decl.Range.First =
  209.       decl.Range.Last = texInput;
  210.       decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
  211.       ctx->emit_declaration(ctx, &decl);
  212.  
  213.       /* declare new temp regs */
  214.       decl = tgsi_default_full_declaration();
  215.       decl.Declaration.File = TGSI_FILE_TEMPORARY;
  216.       decl.Range.First =
  217.       decl.Range.Last = tmp0;
  218.       ctx->emit_declaration(ctx, &decl);
  219.  
  220.       decl = tgsi_default_full_declaration();
  221.       decl.Declaration.File = TGSI_FILE_TEMPORARY;
  222.       decl.Range.First =
  223.       decl.Range.Last = aactx->colorTemp;
  224.       ctx->emit_declaration(ctx, &decl);
  225.  
  226.       aactx->firstInstruction = FALSE;
  227.  
  228.  
  229.       /*
  230.        * Emit code to compute fragment coverage, kill if outside point radius
  231.        *
  232.        * Temp reg0 usage:
  233.        *  t0.x = distance of fragment from center point
  234.        *  t0.y = boolean, is t0.x > 1.0, also misc temp usage
  235.        *  t0.z = temporary for computing 1/(1-k) value
  236.        *  t0.w = final coverage value
  237.        */
  238.  
  239.       /* MUL t0.xy, tex, tex;  # compute x^2, y^2 */
  240.       newInst = tgsi_default_full_instruction();
  241.       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
  242.       newInst.Instruction.NumDstRegs = 1;
  243.       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
  244.       newInst.Dst[0].Register.Index = tmp0;
  245.       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XY;
  246.       newInst.Instruction.NumSrcRegs = 2;
  247.       newInst.Src[0].Register.File = TGSI_FILE_INPUT;
  248.       newInst.Src[0].Register.Index = texInput;
  249.       newInst.Src[1].Register.File = TGSI_FILE_INPUT;
  250.       newInst.Src[1].Register.Index = texInput;
  251.       ctx->emit_instruction(ctx, &newInst);
  252.  
  253.       /* ADD t0.x, t0.x, t0.y;  # x^2 + y^2 */
  254.       newInst = tgsi_default_full_instruction();
  255.       newInst.Instruction.Opcode = TGSI_OPCODE_ADD;
  256.       newInst.Instruction.NumDstRegs = 1;
  257.       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
  258.       newInst.Dst[0].Register.Index = tmp0;
  259.       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
  260.       newInst.Instruction.NumSrcRegs = 2;
  261.       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
  262.       newInst.Src[0].Register.Index = tmp0;
  263.       newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
  264.       newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
  265.       newInst.Src[1].Register.Index = tmp0;
  266.       newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_Y;
  267.       ctx->emit_instruction(ctx, &newInst);
  268.  
  269. #if NORMALIZE  /* OPTIONAL normalization of length */
  270.       /* RSQ t0.x, t0.x; */
  271.       newInst = tgsi_default_full_instruction();
  272.       newInst.Instruction.Opcode = TGSI_OPCODE_RSQ;
  273.       newInst.Instruction.NumDstRegs = 1;
  274.       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
  275.       newInst.Dst[0].Register.Index = tmp0;
  276.       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
  277.       newInst.Instruction.NumSrcRegs = 1;
  278.       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
  279.       newInst.Src[0].Register.Index = tmp0;
  280.       ctx->emit_instruction(ctx, &newInst);
  281.  
  282.       /* RCP t0.x, t0.x; */
  283.       newInst = tgsi_default_full_instruction();
  284.       newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
  285.       newInst.Instruction.NumDstRegs = 1;
  286.       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
  287.       newInst.Dst[0].Register.Index = tmp0;
  288.       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
  289.       newInst.Instruction.NumSrcRegs = 1;
  290.       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
  291.       newInst.Src[0].Register.Index = tmp0;
  292.       ctx->emit_instruction(ctx, &newInst);
  293. #endif
  294.  
  295.       /* SGT t0.y, t0.xxxx, tex.wwww;  # bool b = d > 1 (NOTE tex.w == 1) */
  296.       newInst = tgsi_default_full_instruction();
  297.       newInst.Instruction.Opcode = TGSI_OPCODE_SGT;
  298.       newInst.Instruction.NumDstRegs = 1;
  299.       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
  300.       newInst.Dst[0].Register.Index = tmp0;
  301.       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
  302.       newInst.Instruction.NumSrcRegs = 2;
  303.       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
  304.       newInst.Src[0].Register.Index = tmp0;
  305.       newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
  306.       newInst.Src[1].Register.File = TGSI_FILE_INPUT;
  307.       newInst.Src[1].Register.Index = texInput;
  308.       newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W;
  309.       ctx->emit_instruction(ctx, &newInst);
  310.  
  311.       /* KILL_IF -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
  312.       newInst = tgsi_default_full_instruction();
  313.       newInst.Instruction.Opcode = TGSI_OPCODE_KILL_IF;
  314.       newInst.Instruction.NumDstRegs = 0;
  315.       newInst.Instruction.NumSrcRegs = 1;
  316.       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
  317.       newInst.Src[0].Register.Index = tmp0;
  318.       newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y;
  319.       newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
  320.       newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y;
  321.       newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
  322.       newInst.Src[0].Register.Negate = 1;
  323.       ctx->emit_instruction(ctx, &newInst);
  324.  
  325.  
  326.       /* compute coverage factor = (1-d)/(1-k) */
  327.  
  328.       /* SUB t0.z, tex.w, tex.z;  # m = 1 - k */
  329.       newInst = tgsi_default_full_instruction();
  330.       newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
  331.       newInst.Instruction.NumDstRegs = 1;
  332.       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
  333.       newInst.Dst[0].Register.Index = tmp0;
  334.       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
  335.       newInst.Instruction.NumSrcRegs = 2;
  336.       newInst.Src[0].Register.File = TGSI_FILE_INPUT;
  337.       newInst.Src[0].Register.Index = texInput;
  338.       newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_W;
  339.       newInst.Src[1].Register.File = TGSI_FILE_INPUT;
  340.       newInst.Src[1].Register.Index = texInput;
  341.       newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
  342.       ctx->emit_instruction(ctx, &newInst);
  343.  
  344.       /* RCP t0.z, t0.z;  # t0.z = 1 / m */
  345.       newInst = tgsi_default_full_instruction();
  346.       newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
  347.       newInst.Instruction.NumDstRegs = 1;
  348.       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
  349.       newInst.Dst[0].Register.Index = tmp0;
  350.       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
  351.       newInst.Instruction.NumSrcRegs = 1;
  352.       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
  353.       newInst.Src[0].Register.Index = tmp0;
  354.       newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z;
  355.       ctx->emit_instruction(ctx, &newInst);
  356.  
  357.       /* SUB t0.y, 1, t0.x;  # d = 1 - d */
  358.       newInst = tgsi_default_full_instruction();
  359.       newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
  360.       newInst.Instruction.NumDstRegs = 1;
  361.       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
  362.       newInst.Dst[0].Register.Index = tmp0;
  363.       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
  364.       newInst.Instruction.NumSrcRegs = 2;
  365.       newInst.Src[0].Register.File = TGSI_FILE_INPUT;
  366.       newInst.Src[0].Register.Index = texInput;
  367.       newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_W;
  368.       newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
  369.       newInst.Src[1].Register.Index = tmp0;
  370.       newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_X;
  371.       ctx->emit_instruction(ctx, &newInst);
  372.  
  373.       /* MUL t0.w, t0.y, t0.z;   # coverage = d * m */
  374.       newInst = tgsi_default_full_instruction();
  375.       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
  376.       newInst.Instruction.NumDstRegs = 1;
  377.       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
  378.       newInst.Dst[0].Register.Index = tmp0;
  379.       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
  380.       newInst.Instruction.NumSrcRegs = 2;
  381.       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
  382.       newInst.Src[0].Register.Index = tmp0;
  383.       newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
  384.       newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
  385.       newInst.Src[1].Register.Index = tmp0;
  386.       newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_Z;
  387.       ctx->emit_instruction(ctx, &newInst);
  388.  
  389.       /* SLE t0.y, t0.x, tex.z;  # bool b = distance <= k */
  390.       newInst = tgsi_default_full_instruction();
  391.       newInst.Instruction.Opcode = TGSI_OPCODE_SLE;
  392.       newInst.Instruction.NumDstRegs = 1;
  393.       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
  394.       newInst.Dst[0].Register.Index = tmp0;
  395.       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
  396.       newInst.Instruction.NumSrcRegs = 2;
  397.       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
  398.       newInst.Src[0].Register.Index = tmp0;
  399.       newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
  400.       newInst.Src[1].Register.File = TGSI_FILE_INPUT;
  401.       newInst.Src[1].Register.Index = texInput;
  402.       newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Z;
  403.       ctx->emit_instruction(ctx, &newInst);
  404.  
  405.       /* CMP t0.w, -t0.y, tex.w, t0.w;
  406.        *  # if -t0.y < 0 then
  407.        *       t0.w = 1
  408.        *    else
  409.        *       t0.w = t0.w
  410.        */
  411.       newInst = tgsi_default_full_instruction();
  412.       newInst.Instruction.Opcode = TGSI_OPCODE_CMP;
  413.       newInst.Instruction.NumDstRegs = 1;
  414.       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
  415.       newInst.Dst[0].Register.Index = tmp0;
  416.       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
  417.       newInst.Instruction.NumSrcRegs = 3;
  418.       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
  419.       newInst.Src[0].Register.Index = tmp0;
  420.       newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y;
  421.       newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
  422.       newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y;
  423.       newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
  424.       newInst.Src[0].Register.Negate = 1;
  425.       newInst.Src[1].Register.File = TGSI_FILE_INPUT;
  426.       newInst.Src[1].Register.Index = texInput;
  427.       newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_W;
  428.       newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W;
  429.       newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_W;
  430.       newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
  431.       newInst.Src[2].Register.File = TGSI_FILE_TEMPORARY;
  432.       newInst.Src[2].Register.Index = tmp0;
  433.       newInst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_W;
  434.       newInst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_W;
  435.       newInst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_W;
  436.       newInst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
  437.       ctx->emit_instruction(ctx, &newInst);
  438.  
  439.    }
  440.  
  441.    if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
  442.       /* add alpha modulation code at tail of program */
  443.  
  444.       /* MOV result.color.xyz, colorTemp; */
  445.       newInst = tgsi_default_full_instruction();
  446.       newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
  447.       newInst.Instruction.NumDstRegs = 1;
  448.       newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
  449.       newInst.Dst[0].Register.Index = aactx->colorOutput;
  450.       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
  451.       newInst.Instruction.NumSrcRegs = 1;
  452.       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
  453.       newInst.Src[0].Register.Index = aactx->colorTemp;
  454.       ctx->emit_instruction(ctx, &newInst);
  455.  
  456.       /* MUL result.color.w, colorTemp, tmp0.w; */
  457.       newInst = tgsi_default_full_instruction();
  458.       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
  459.       newInst.Instruction.NumDstRegs = 1;
  460.       newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
  461.       newInst.Dst[0].Register.Index = aactx->colorOutput;
  462.       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
  463.       newInst.Instruction.NumSrcRegs = 2;
  464.       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
  465.       newInst.Src[0].Register.Index = aactx->colorTemp;
  466.       newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
  467.       newInst.Src[1].Register.Index = aactx->tmp0;
  468.       ctx->emit_instruction(ctx, &newInst);
  469.    }
  470.    else {
  471.       /* Not an END instruction.
  472.        * Look for writes to result.color and replace with colorTemp reg.
  473.        */
  474.       uint i;
  475.  
  476.       for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
  477.          struct tgsi_full_dst_register *dst = &inst->Dst[i];
  478.          if (dst->Register.File == TGSI_FILE_OUTPUT &&
  479.              dst->Register.Index == aactx->colorOutput) {
  480.             dst->Register.File = TGSI_FILE_TEMPORARY;
  481.             dst->Register.Index = aactx->colorTemp;
  482.          }
  483.       }
  484.    }
  485.  
  486.    ctx->emit_instruction(ctx, inst);
  487. }
  488.  
  489.  
  490. /**
  491.  * Generate the frag shader we'll use for drawing AA points.
  492.  * This will be the user's shader plus some texture/modulate instructions.
  493.  */
  494. static boolean
  495. generate_aapoint_fs(struct aapoint_stage *aapoint)
  496. {
  497.    const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
  498.    struct pipe_shader_state aapoint_fs;
  499.    struct aa_transform_context transform;
  500.    const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
  501.    struct pipe_context *pipe = aapoint->stage.draw->pipe;
  502.  
  503.    aapoint_fs = *orig_fs; /* copy to init */
  504.    aapoint_fs.tokens = tgsi_alloc_tokens(newLen);
  505.    if (aapoint_fs.tokens == NULL)
  506.       return FALSE;
  507.  
  508.    memset(&transform, 0, sizeof(transform));
  509.    transform.colorOutput = -1;
  510.    transform.maxInput = -1;
  511.    transform.maxGeneric = -1;
  512.    transform.colorTemp = -1;
  513.    transform.tmp0 = -1;
  514.    transform.firstInstruction = TRUE;
  515.    transform.base.transform_instruction = aa_transform_inst;
  516.    transform.base.transform_declaration = aa_transform_decl;
  517.  
  518.    tgsi_transform_shader(orig_fs->tokens,
  519.                          (struct tgsi_token *) aapoint_fs.tokens,
  520.                          newLen, &transform.base);
  521.  
  522. #if 0 /* DEBUG */
  523.    debug_printf("draw_aapoint, orig shader:\n");
  524.    tgsi_dump(orig_fs->tokens, 0);
  525.    debug_printf("draw_aapoint, new shader:\n");
  526.    tgsi_dump(aapoint_fs.tokens, 0);
  527. #endif
  528.  
  529.    aapoint->fs->aapoint_fs
  530.       = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
  531.    if (aapoint->fs->aapoint_fs == NULL)
  532.       goto fail;
  533.  
  534.    aapoint->fs->generic_attrib = transform.maxGeneric + 1;
  535.    FREE((void *)aapoint_fs.tokens);
  536.    return TRUE;
  537.  
  538. fail:
  539.    FREE((void *)aapoint_fs.tokens);
  540.    return FALSE;
  541. }
  542.  
  543.  
  544. /**
  545.  * When we're about to draw our first AA point in a batch, this function is
  546.  * called to tell the driver to bind our modified fragment shader.
  547.  */
  548. static boolean
  549. bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
  550. {
  551.    struct draw_context *draw = aapoint->stage.draw;
  552.    struct pipe_context *pipe = draw->pipe;
  553.  
  554.    if (!aapoint->fs->aapoint_fs &&
  555.        !generate_aapoint_fs(aapoint))
  556.       return FALSE;
  557.  
  558.    draw->suspend_flushing = TRUE;
  559.    aapoint->driver_bind_fs_state(pipe, aapoint->fs->aapoint_fs);
  560.    draw->suspend_flushing = FALSE;
  561.  
  562.    return TRUE;
  563. }
  564.  
  565.  
  566.  
  567. static INLINE struct aapoint_stage *
  568. aapoint_stage( struct draw_stage *stage )
  569. {
  570.    return (struct aapoint_stage *) stage;
  571. }
  572.  
  573.  
  574.  
  575.  
  576. /**
  577.  * Draw an AA point by drawing a quad.
  578.  */
  579. static void
  580. aapoint_point(struct draw_stage *stage, struct prim_header *header)
  581. {
  582.    const struct aapoint_stage *aapoint = aapoint_stage(stage);
  583.    struct prim_header tri;
  584.    struct vertex_header *v[4];
  585.    const uint tex_slot = aapoint->tex_slot;
  586.    const uint pos_slot = aapoint->pos_slot;
  587.    float radius, *pos, *tex;
  588.    uint i;
  589.    float k;
  590.  
  591.    if (aapoint->psize_slot >= 0) {
  592.       radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
  593.    }
  594.    else {
  595.       radius = aapoint->radius;
  596.    }
  597.  
  598.    /*
  599.     * Note: the texcoords (generic attrib, really) we use are special:
  600.     * The S and T components simply vary from -1 to +1.
  601.     * The R component is k, below.
  602.     * The Q component is 1.0 and will used as a handy constant in the
  603.     * fragment shader.
  604.     */
  605.  
  606.    /*
  607.     * k is the threshold distance from the point's center at which
  608.     * we begin alpha attenuation (the coverage value).
  609.     * Operating within a unit circle, we'll compute the fragment's
  610.     * distance 'd' from the center point using the texcoords.
  611.     * IF d > 1.0 THEN
  612.     *    KILL fragment
  613.     * ELSE IF d > k THEN
  614.     *    compute coverage in [0,1] proportional to d in [k, 1].
  615.     * ELSE
  616.     *    coverage = 1.0;  // full coverage
  617.     * ENDIF
  618.     *
  619.     * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
  620.     * avoid using IF/ELSE/ENDIF TGSI opcodes.
  621.     */
  622.  
  623. #if !NORMALIZE
  624.    k = 1.0f / radius;
  625.    k = 1.0f - 2.0f * k + k * k;
  626. #else
  627.    k = 1.0f - 1.0f / radius;
  628. #endif
  629.  
  630.    /* allocate/dup new verts */
  631.    for (i = 0; i < 4; i++) {
  632.       v[i] = dup_vert(stage, header->v[0], i);
  633.    }
  634.  
  635.    /* new verts */
  636.    pos = v[0]->data[pos_slot];
  637.    pos[0] -= radius;
  638.    pos[1] -= radius;
  639.  
  640.    pos = v[1]->data[pos_slot];
  641.    pos[0] += radius;
  642.    pos[1] -= radius;
  643.  
  644.    pos = v[2]->data[pos_slot];
  645.    pos[0] += radius;
  646.    pos[1] += radius;
  647.  
  648.    pos = v[3]->data[pos_slot];
  649.    pos[0] -= radius;
  650.    pos[1] += radius;
  651.  
  652.    /* new texcoords */
  653.    tex = v[0]->data[tex_slot];
  654.    ASSIGN_4V(tex, -1, -1, k, 1);
  655.  
  656.    tex = v[1]->data[tex_slot];
  657.    ASSIGN_4V(tex,  1, -1, k, 1);
  658.  
  659.    tex = v[2]->data[tex_slot];
  660.    ASSIGN_4V(tex,  1,  1, k, 1);
  661.  
  662.    tex = v[3]->data[tex_slot];
  663.    ASSIGN_4V(tex, -1,  1, k, 1);
  664.  
  665.    /* emit 2 tris for the quad strip */
  666.    tri.v[0] = v[0];
  667.    tri.v[1] = v[1];
  668.    tri.v[2] = v[2];
  669.    stage->next->tri( stage->next, &tri );
  670.  
  671.    tri.v[0] = v[0];
  672.    tri.v[1] = v[2];
  673.    tri.v[2] = v[3];
  674.    stage->next->tri( stage->next, &tri );
  675. }
  676.  
  677.  
  678. static void
  679. aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
  680. {
  681.    auto struct aapoint_stage *aapoint = aapoint_stage(stage);
  682.    struct draw_context *draw = stage->draw;
  683.    struct pipe_context *pipe = draw->pipe;
  684.    const struct pipe_rasterizer_state *rast = draw->rasterizer;
  685.    void *r;
  686.  
  687.    assert(draw->rasterizer->point_smooth);
  688.  
  689.    if (draw->rasterizer->point_size <= 2.0)
  690.       aapoint->radius = 1.0;
  691.    else
  692.       aapoint->radius = 0.5f * draw->rasterizer->point_size;
  693.  
  694.    /*
  695.     * Bind (generate) our fragprog.
  696.     */
  697.    bind_aapoint_fragment_shader(aapoint);
  698.  
  699.    /* update vertex attrib info */
  700.    aapoint->pos_slot = draw_current_shader_position_output(draw);
  701.  
  702.    /* allocate the extra post-transformed vertex attribute */
  703.    aapoint->tex_slot = draw_alloc_extra_vertex_attrib(draw,
  704.                                                       TGSI_SEMANTIC_GENERIC,
  705.                                                       aapoint->fs->generic_attrib);
  706.    assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
  707.  
  708.    /* find psize slot in post-transform vertex */
  709.    aapoint->psize_slot = -1;
  710.    if (draw->rasterizer->point_size_per_vertex) {
  711.       const struct tgsi_shader_info *info = draw_get_shader_info(draw);
  712.       uint i;
  713.       /* find PSIZ vertex output */
  714.       for (i = 0; i < info->num_outputs; i++) {
  715.          if (info->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
  716.             aapoint->psize_slot = i;
  717.             break;
  718.          }
  719.       }
  720.    }
  721.  
  722.    draw->suspend_flushing = TRUE;
  723.  
  724.    /* Disable triangle culling, stippling, unfilled mode etc. */
  725.    r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
  726.    pipe->bind_rasterizer_state(pipe, r);
  727.  
  728.    draw->suspend_flushing = FALSE;
  729.  
  730.    /* now really draw first point */
  731.    stage->point = aapoint_point;
  732.    stage->point(stage, header);
  733. }
  734.  
  735.  
  736. static void
  737. aapoint_flush(struct draw_stage *stage, unsigned flags)
  738. {
  739.    struct draw_context *draw = stage->draw;
  740.    struct aapoint_stage *aapoint = aapoint_stage(stage);
  741.    struct pipe_context *pipe = draw->pipe;
  742.  
  743.    stage->point = aapoint_first_point;
  744.    stage->next->flush( stage->next, flags );
  745.  
  746.    /* restore original frag shader */
  747.    draw->suspend_flushing = TRUE;
  748.    aapoint->driver_bind_fs_state(pipe, aapoint->fs ? aapoint->fs->driver_fs : NULL);
  749.  
  750.    /* restore original rasterizer state */
  751.    if (draw->rast_handle) {
  752.       pipe->bind_rasterizer_state(pipe, draw->rast_handle);
  753.    }
  754.  
  755.    draw->suspend_flushing = FALSE;
  756.  
  757.    draw_remove_extra_vertex_attribs(draw);
  758. }
  759.  
  760.  
  761. static void
  762. aapoint_reset_stipple_counter(struct draw_stage *stage)
  763. {
  764.    stage->next->reset_stipple_counter( stage->next );
  765. }
  766.  
  767.  
  768. static void
  769. aapoint_destroy(struct draw_stage *stage)
  770. {
  771.    struct aapoint_stage* aapoint = aapoint_stage(stage);
  772.    struct pipe_context *pipe = stage->draw->pipe;
  773.  
  774.    draw_free_temp_verts( stage );
  775.  
  776.    /* restore the old entry points */
  777.    pipe->create_fs_state = aapoint->driver_create_fs_state;
  778.    pipe->bind_fs_state = aapoint->driver_bind_fs_state;
  779.    pipe->delete_fs_state = aapoint->driver_delete_fs_state;
  780.  
  781.    FREE( stage );
  782. }
  783.  
  784.  
  785. static struct aapoint_stage *
  786. draw_aapoint_stage(struct draw_context *draw)
  787. {
  788.    struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
  789.    if (aapoint == NULL)
  790.       goto fail;
  791.  
  792.    aapoint->stage.draw = draw;
  793.    aapoint->stage.name = "aapoint";
  794.    aapoint->stage.next = NULL;
  795.    aapoint->stage.point = aapoint_first_point;
  796.    aapoint->stage.line = draw_pipe_passthrough_line;
  797.    aapoint->stage.tri = draw_pipe_passthrough_tri;
  798.    aapoint->stage.flush = aapoint_flush;
  799.    aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
  800.    aapoint->stage.destroy = aapoint_destroy;
  801.  
  802.    if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
  803.       goto fail;
  804.  
  805.    return aapoint;
  806.  
  807.  fail:
  808.    if (aapoint)
  809.       aapoint->stage.destroy(&aapoint->stage);
  810.  
  811.    return NULL;
  812.  
  813. }
  814.  
  815.  
  816. static struct aapoint_stage *
  817. aapoint_stage_from_pipe(struct pipe_context *pipe)
  818. {
  819.    struct draw_context *draw = (struct draw_context *) pipe->draw;
  820.    return aapoint_stage(draw->pipeline.aapoint);
  821. }
  822.  
  823.  
  824. /**
  825.  * This function overrides the driver's create_fs_state() function and
  826.  * will typically be called by the state tracker.
  827.  */
  828. static void *
  829. aapoint_create_fs_state(struct pipe_context *pipe,
  830.                        const struct pipe_shader_state *fs)
  831. {
  832.    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
  833.    struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
  834.    if (aafs == NULL)
  835.       return NULL;
  836.  
  837.    aafs->state.tokens = tgsi_dup_tokens(fs->tokens);
  838.  
  839.    /* pass-through */
  840.    aafs->driver_fs = aapoint->driver_create_fs_state(pipe, fs);
  841.  
  842.    return aafs;
  843. }
  844.  
  845.  
  846. static void
  847. aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
  848. {
  849.    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
  850.    struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
  851.    /* save current */
  852.    aapoint->fs = aafs;
  853.    /* pass-through */
  854.    aapoint->driver_bind_fs_state(pipe,
  855.                                  (aafs ? aafs->driver_fs : NULL));
  856. }
  857.  
  858.  
  859. static void
  860. aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
  861. {
  862.    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
  863.    struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
  864.  
  865.    /* pass-through */
  866.    aapoint->driver_delete_fs_state(pipe, aafs->driver_fs);
  867.  
  868.    if (aafs->aapoint_fs)
  869.       aapoint->driver_delete_fs_state(pipe, aafs->aapoint_fs);
  870.  
  871.    FREE((void*)aafs->state.tokens);
  872.  
  873.    FREE(aafs);
  874. }
  875.  
  876.  
  877. /**
  878.  * Called by drivers that want to install this AA point prim stage
  879.  * into the draw module's pipeline.  This will not be used if the
  880.  * hardware has native support for AA points.
  881.  */
  882. boolean
  883. draw_install_aapoint_stage(struct draw_context *draw,
  884.                            struct pipe_context *pipe)
  885. {
  886.    struct aapoint_stage *aapoint;
  887.  
  888.    pipe->draw = (void *) draw;
  889.  
  890.    /*
  891.     * Create / install AA point drawing / prim stage
  892.     */
  893.    aapoint = draw_aapoint_stage( draw );
  894.    if (aapoint == NULL)
  895.       return FALSE;
  896.  
  897.    /* save original driver functions */
  898.    aapoint->driver_create_fs_state = pipe->create_fs_state;
  899.    aapoint->driver_bind_fs_state = pipe->bind_fs_state;
  900.    aapoint->driver_delete_fs_state = pipe->delete_fs_state;
  901.  
  902.    /* override the driver's functions */
  903.    pipe->create_fs_state = aapoint_create_fs_state;
  904.    pipe->bind_fs_state = aapoint_bind_fs_state;
  905.    pipe->delete_fs_state = aapoint_delete_fs_state;
  906.  
  907.    draw->pipeline.aapoint = &aapoint->stage;
  908.  
  909.    return TRUE;
  910. }
  911.