Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2014 Broadcom
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * DOC: Shader validator for VC4.
  26.  *
  27.  * The VC4 has no IOMMU between it and system memory.  So, a user with access
  28.  * to execute shaders could escalate privilege by overwriting system memory
  29.  * (using the VPM write address register in the general-purpose DMA mode) or
  30.  * reading system memory it shouldn't (reading it as a texture, or uniform
  31.  * data, or vertex data).
  32.  *
  33.  * This walks over a shader starting from some offset within a BO, ensuring
  34.  * that its accesses are appropriately bounded, and recording how many texture
  35.  * accesses are made and where so that we can do relocations for them in the
  36.  * uniform stream.
  37.  *
  38.  * The kernel API has shaders stored in user-mapped BOs.  The BOs will be
  39.  * forcibly unmapped from the process before validation, and any cache of
  40.  * validated state will be flushed if the mapping is faulted back in.
  41.  *
  42.  * Storing the shaders in BOs means that the validation process will be slow
  43.  * due to uncached reads, but since shaders are long-lived and shader BOs are
  44.  * never actually modified, this shouldn't be a problem.
  45.  */
  46.  
  47. #include "vc4_drv.h"
  48. #include "vc4_qpu.h"
  49. #include "vc4_qpu_defines.h"
  50.  
  51. struct vc4_shader_validation_state {
  52.         struct vc4_texture_sample_info tmu_setup[2];
  53.         int tmu_write_count[2];
  54.  
  55.         /* For registers that were last written to by a MIN instruction with
  56.          * one argument being a uniform, the address of the uniform.
  57.          * Otherwise, ~0.
  58.          *
  59.          * This is used for the validation of direct address memory reads.
  60.          */
  61.         uint32_t live_clamp_offsets[32 + 32 + 4];
  62. };
  63.  
  64. static uint32_t
  65. waddr_to_live_reg_index(uint32_t waddr, bool is_b)
  66. {
  67.         if (waddr < 32) {
  68.                 if (is_b)
  69.                         return 32 + waddr;
  70.                 else
  71.                         return waddr;
  72.         } else if (waddr <= QPU_W_ACC3) {
  73.  
  74.                 return 64 + waddr - QPU_W_ACC0;
  75.         } else {
  76.                 return ~0;
  77.         }
  78. }
  79.  
  80. static bool
  81. is_tmu_submit(uint32_t waddr)
  82. {
  83.         return (waddr == QPU_W_TMU0_S ||
  84.                 waddr == QPU_W_TMU1_S);
  85. }
  86.  
  87. static bool
  88. is_tmu_write(uint32_t waddr)
  89. {
  90.         return (waddr >= QPU_W_TMU0_S &&
  91.                 waddr <= QPU_W_TMU1_B);
  92. }
  93.  
  94. static bool
  95. record_validated_texture_sample(struct vc4_validated_shader_info *validated_shader,
  96.                                 struct vc4_shader_validation_state *validation_state,
  97.                                 int tmu)
  98. {
  99.         uint32_t s = validated_shader->num_texture_samples;
  100.         int i;
  101.         struct vc4_texture_sample_info *temp_samples;
  102.  
  103.         temp_samples = krealloc(validated_shader->texture_samples,
  104.                                 (s + 1) * sizeof(*temp_samples),
  105.                                 GFP_KERNEL);
  106.         if (!temp_samples)
  107.                 return false;
  108.  
  109.         memcpy(&temp_samples[s],
  110.                &validation_state->tmu_setup[tmu],
  111.                sizeof(*temp_samples));
  112.  
  113.         validated_shader->num_texture_samples = s + 1;
  114.         validated_shader->texture_samples = temp_samples;
  115.  
  116.         for (i = 0; i < 4; i++)
  117.                 validation_state->tmu_setup[tmu].p_offset[i] = ~0;
  118.  
  119.         return true;
  120. }
  121.  
  122. static bool
  123. check_tmu_write(uint64_t inst,
  124.                 struct vc4_validated_shader_info *validated_shader,
  125.                 struct vc4_shader_validation_state *validation_state,
  126.                 bool is_mul)
  127. {
  128.         uint32_t waddr = (is_mul ?
  129.                           QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
  130.                           QPU_GET_FIELD(inst, QPU_WADDR_ADD));
  131.         uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
  132.         uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
  133.         int tmu = waddr > QPU_W_TMU0_B;
  134.         bool submit = is_tmu_submit(waddr);
  135.         bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
  136.         uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  137.  
  138.         if (is_direct) {
  139.                 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
  140.                 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
  141.                 uint32_t clamp_offset = ~0;
  142.  
  143.                 if (sig == QPU_SIG_SMALL_IMM) {
  144.                         DRM_ERROR("direct TMU read used small immediate\n");
  145.                         return false;
  146.                 }
  147.  
  148.                 /* Make sure that this texture load is an add of the base
  149.                  * address of the UBO to a clamped offset within the UBO.
  150.                  */
  151.                 if (is_mul ||
  152.                     QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
  153.                         DRM_ERROR("direct TMU load wasn't an add\n");
  154.                         return false;
  155.                 }
  156.  
  157.                 /* We assert that the the clamped address is the first
  158.                  * argument, and the UBO base address is the second argument.
  159.                  * This is arbitrary, but simpler than supporting flipping the
  160.                  * two either way.
  161.                  */
  162.                 if (add_a == QPU_MUX_A) {
  163.                         clamp_offset = validation_state->live_clamp_offsets[raddr_a];
  164.                 } else if (add_a == QPU_MUX_B) {
  165.                         clamp_offset = validation_state->live_clamp_offsets[32 + raddr_b];
  166.                 } else if (add_a <= QPU_MUX_R4) {
  167.                         clamp_offset = validation_state->live_clamp_offsets[64 + add_a];
  168.                 }
  169.  
  170.                 if (clamp_offset == ~0) {
  171.                         DRM_ERROR("direct TMU load wasn't clamped\n");
  172.                         return false;
  173.                 }
  174.  
  175.                 /* Store the clamp value's offset in p1 (see reloc_tex() in
  176.                  * vc4_validate.c).
  177.                  */
  178.                 validation_state->tmu_setup[tmu].p_offset[1] =
  179.                         clamp_offset;
  180.  
  181.                 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
  182.                     !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
  183.                         DRM_ERROR("direct TMU load didn't add to a uniform\n");
  184.                         return false;
  185.                 }
  186.  
  187.                 validation_state->tmu_setup[tmu].is_direct = true;
  188.         } else {
  189.                 if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
  190.                                               raddr_b == QPU_R_UNIF)) {
  191.                         DRM_ERROR("uniform read in the same instruction as "
  192.                                   "texture setup.\n");
  193.                         return false;
  194.                 }
  195.         }
  196.  
  197.         if (validation_state->tmu_write_count[tmu] >= 4) {
  198.                 DRM_ERROR("TMU%d got too many parameters before dispatch\n",
  199.                           tmu);
  200.                 return false;
  201.         }
  202.         validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
  203.                 validated_shader->uniforms_size;
  204.         validation_state->tmu_write_count[tmu]++;
  205.         /* Since direct uses a RADDR uniform reference, it will get counted in
  206.          * check_instruction_reads()
  207.          */
  208.         if (!is_direct)
  209.                 validated_shader->uniforms_size += 4;
  210.  
  211.         if (submit) {
  212.                 if (!record_validated_texture_sample(validated_shader,
  213.                                                      validation_state, tmu)) {
  214.                         return false;
  215.                 }
  216.  
  217.                 validation_state->tmu_write_count[tmu] = 0;
  218.         }
  219.  
  220.         return true;
  221. }
  222.  
  223. static bool
  224. check_register_write(uint64_t inst,
  225.                      struct vc4_validated_shader_info *validated_shader,
  226.                      struct vc4_shader_validation_state *validation_state,
  227.                      bool is_mul)
  228. {
  229.         uint32_t waddr = (is_mul ?
  230.                           QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
  231.                           QPU_GET_FIELD(inst, QPU_WADDR_ADD));
  232.         bool is_b = is_mul != ((inst & QPU_WS) != 0);
  233.         uint32_t live_reg_index;
  234.  
  235.         switch (waddr) {
  236.         case QPU_W_UNIFORMS_ADDRESS:
  237.                 /* XXX: We'll probably need to support this for reladdr, but
  238.                  * it's definitely a security-related one.
  239.                  */
  240.                 DRM_ERROR("uniforms address load unsupported\n");
  241.                 return false;
  242.  
  243.         case QPU_W_TLB_COLOR_MS:
  244.         case QPU_W_TLB_COLOR_ALL:
  245.         case QPU_W_TLB_Z:
  246.                 /* These only interact with the tile buffer, not main memory,
  247.                  * so they're safe.
  248.                  */
  249.                 return true;
  250.  
  251.         case QPU_W_TMU0_S:
  252.         case QPU_W_TMU0_T:
  253.         case QPU_W_TMU0_R:
  254.         case QPU_W_TMU0_B:
  255.         case QPU_W_TMU1_S:
  256.         case QPU_W_TMU1_T:
  257.         case QPU_W_TMU1_R:
  258.         case QPU_W_TMU1_B:
  259.                 return check_tmu_write(inst, validated_shader, validation_state,
  260.                                        is_mul);
  261.  
  262.         case QPU_W_HOST_INT:
  263.         case QPU_W_TMU_NOSWAP:
  264.         case QPU_W_TLB_ALPHA_MASK:
  265.         case QPU_W_MUTEX_RELEASE:
  266.                 /* XXX: I haven't thought about these, so don't support them
  267.                  * for now.
  268.                  */
  269.                 DRM_ERROR("Unsupported waddr %d\n", waddr);
  270.                 return false;
  271.  
  272.         case QPU_W_VPM_ADDR:
  273.                 DRM_ERROR("General VPM DMA unsupported\n");
  274.                 return false;
  275.  
  276.         case QPU_W_VPM:
  277.         case QPU_W_VPMVCD_SETUP:
  278.                 /* We allow VPM setup in general, even including VPM DMA
  279.                  * configuration setup, because the (unsafe) DMA can only be
  280.                  * triggered by QPU_W_VPM_ADDR writes.
  281.                  */
  282.                 return true;
  283.  
  284.         case QPU_W_TLB_STENCIL_SETUP:
  285.                 return true;
  286.         }
  287.  
  288.         /* Clear out the live offset clamp tracking for the written register.
  289.          * If this particular instruction is setting up an offset clamp, it'll
  290.          * get tracked immediately after we return.
  291.          */
  292.         live_reg_index = waddr_to_live_reg_index(waddr, is_b);
  293.         if (live_reg_index != ~0)
  294.                 validation_state->live_clamp_offsets[live_reg_index] = ~0;
  295.  
  296.         return true;
  297. }
  298.  
  299. static void
  300. track_live_clamps(uint64_t inst,
  301.                   struct vc4_validated_shader_info *validated_shader,
  302.                   struct vc4_shader_validation_state *validation_state)
  303. {
  304.         uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
  305.         uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
  306.         uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
  307.         uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
  308.         uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  309.         bool is_b = inst & QPU_WS;
  310.         uint32_t live_reg_index;
  311.  
  312.         if (QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_MIN)
  313.                 return;
  314.  
  315.         if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
  316.             !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
  317.               sig != QPU_SIG_SMALL_IMM)) {
  318.                 return;
  319.         }
  320.  
  321.         live_reg_index = waddr_to_live_reg_index(waddr_add, is_b);
  322.         if (live_reg_index != ~0) {
  323.                 validation_state->live_clamp_offsets[live_reg_index] =
  324.                         validated_shader->uniforms_size;
  325.         }
  326. }
  327.  
  328. static bool
  329. check_instruction_writes(uint64_t inst,
  330.                          struct vc4_validated_shader_info *validated_shader,
  331.                          struct vc4_shader_validation_state *validation_state)
  332. {
  333.         uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
  334.         uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
  335.         bool ok;
  336.  
  337.         if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
  338.                 DRM_ERROR("ADD and MUL both set up textures\n");
  339.                 return false;
  340.         }
  341.  
  342.         ok = (check_register_write(inst, validated_shader, validation_state, false) &&
  343.               check_register_write(inst, validated_shader, validation_state, true));
  344.  
  345.         track_live_clamps(inst, validated_shader, validation_state);
  346.  
  347.         return ok;
  348. }
  349.  
  350. static bool
  351. check_instruction_reads(uint64_t inst,
  352.                         struct vc4_validated_shader_info *validated_shader)
  353. {
  354.         uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
  355.         uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
  356.         uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  357.  
  358.         if (raddr_a == QPU_R_UNIF ||
  359.             (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
  360.                 /* This can't overflow the uint32_t, because we're reading 8
  361.                  * bytes of instruction to increment by 4 here, so we'd
  362.                  * already be OOM.
  363.                  */
  364.                 validated_shader->uniforms_size += 4;
  365.         }
  366.  
  367.         return true;
  368. }
  369.  
  370. struct vc4_validated_shader_info *
  371. vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
  372. {
  373.         bool found_shader_end = false;
  374.         int shader_end_ip = 0;
  375.         uint32_t ip, max_ip;
  376.         uint64_t *shader;
  377.         struct vc4_validated_shader_info *validated_shader;
  378.         struct vc4_shader_validation_state validation_state;
  379.         int i;
  380.  
  381.         memset(&validation_state, 0, sizeof(validation_state));
  382.  
  383.         for (i = 0; i < 8; i++)
  384.                 validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
  385.         for (i = 0; i < ARRAY_SIZE(validation_state.live_clamp_offsets); i++)
  386.                 validation_state.live_clamp_offsets[i] = ~0;
  387.  
  388.         shader = shader_obj->vaddr;
  389.         max_ip = shader_obj->base.size / sizeof(uint64_t);
  390.  
  391.         validated_shader = kcalloc(sizeof(*validated_shader), 1, GFP_KERNEL);
  392.         if (!validated_shader)
  393.                 return NULL;
  394.  
  395.         for (ip = 0; ip < max_ip; ip++) {
  396.                 uint64_t inst = shader[ip];
  397.                 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  398.  
  399.                 switch (sig) {
  400.                 case QPU_SIG_NONE:
  401.                 case QPU_SIG_WAIT_FOR_SCOREBOARD:
  402.                 case QPU_SIG_SCOREBOARD_UNLOCK:
  403.                 case QPU_SIG_COLOR_LOAD:
  404.                 case QPU_SIG_LOAD_TMU0:
  405.                 case QPU_SIG_LOAD_TMU1:
  406.                 case QPU_SIG_PROG_END:
  407.                 case QPU_SIG_SMALL_IMM:
  408.                         if (!check_instruction_writes(inst, validated_shader,
  409.                                                       &validation_state)) {
  410.                                 DRM_ERROR("Bad write at ip %d\n", ip);
  411.                                 goto fail;
  412.                         }
  413.  
  414.                         if (!check_instruction_reads(inst, validated_shader))
  415.                                 goto fail;
  416.  
  417.                         if (sig == QPU_SIG_PROG_END) {
  418.                                 found_shader_end = true;
  419.                                 shader_end_ip = ip;
  420.                         }
  421.  
  422.                         break;
  423.  
  424.                 case QPU_SIG_LOAD_IMM:
  425.                         if (!check_instruction_writes(inst, validated_shader,
  426.                                                       &validation_state)) {
  427.                                 DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
  428.                                 goto fail;
  429.                         }
  430.                         break;
  431.  
  432.                 default:
  433.                         DRM_ERROR("Unsupported QPU signal %d at "
  434.                                   "instruction %d\n", sig, ip);
  435.                         goto fail;
  436.                 }
  437.  
  438.                 /* There are two delay slots after program end is signaled
  439.                  * that are still executed, then we're finished.
  440.                  */
  441.                 if (found_shader_end && ip == shader_end_ip + 2)
  442.                         break;
  443.         }
  444.  
  445.         if (ip == max_ip) {
  446.                 DRM_ERROR("shader failed to terminate before "
  447.                           "shader BO end at %d\n",
  448.                           shader_obj->base.size);
  449.                 goto fail;
  450.         }
  451.  
  452.         /* Again, no chance of integer overflow here because the worst case
  453.          * scenario is 8 bytes of uniforms plus handles per 8-byte
  454.          * instruction.
  455.          */
  456.         validated_shader->uniforms_src_size =
  457.                 (validated_shader->uniforms_size +
  458.                  4 * validated_shader->num_texture_samples);
  459.  
  460.         return validated_shader;
  461.  
  462. fail:
  463.         kfree(validated_shader);
  464.         return NULL;
  465. }
  466.