Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2010 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  17.  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  18.  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  19.  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20.  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21.  *
  22.  * The above copyright notice and this permission notice (including the
  23.  * next paragraph) shall be included in all copies or substantial portions
  24.  * of the Software.
  25.  *
  26.  **************************************************************************/
  27.  
  28.  
  29. /**
  30.  * @file
  31.  * YUV pixel format manipulation.
  32.  *
  33.  * @author Jose Fonseca <jfonseca@vmware.com>
  34.  */
  35.  
  36.  
  37. #include "util/u_format.h"
  38. #include "util/u_cpu_detect.h"
  39.  
  40. #include "lp_bld_arit.h"
  41. #include "lp_bld_type.h"
  42. #include "lp_bld_const.h"
  43. #include "lp_bld_conv.h"
  44. #include "lp_bld_gather.h"
  45. #include "lp_bld_format.h"
  46. #include "lp_bld_init.h"
  47. #include "lp_bld_logic.h"
  48.  
  49. /**
  50.  * Extract Y, U, V channels from packed UYVY.
  51.  * @param packed  is a <n x i32> vector with the packed UYVY blocks
  52.  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
  53.  */
  54. static void
  55. uyvy_to_yuv_soa(struct gallivm_state *gallivm,
  56.                 unsigned n,
  57.                 LLVMValueRef packed,
  58.                 LLVMValueRef i,
  59.                 LLVMValueRef *y,
  60.                 LLVMValueRef *u,
  61.                 LLVMValueRef *v)
  62. {
  63.    LLVMBuilderRef builder = gallivm->builder;
  64.    struct lp_type type;
  65.    LLVMValueRef mask;
  66.  
  67.    memset(&type, 0, sizeof type);
  68.    type.width = 32;
  69.    type.length = n;
  70.  
  71.    assert(lp_check_value(type, packed));
  72.    assert(lp_check_value(type, i));
  73.  
  74.    /*
  75.     * Little endian:
  76.     * y = (uyvy >> (16*i + 8)) & 0xff
  77.     * u = (uyvy        ) & 0xff
  78.     * v = (uyvy >> 16  ) & 0xff
  79.     *
  80.     * Big endian:
  81.     * y = (uyvy >> (-16*i + 16)) & 0xff
  82.     * u = (uyvy >> 24) & 0xff
  83.     * v = (uyvy >>  8) & 0xff
  84.     */
  85.  
  86. #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  87.    /*
  88.     * Avoid shift with per-element count.
  89.     * No support on x86, gets translated to roughly 5 instructions
  90.     * per element. Didn't measure performance but cuts shader size
  91.     * by quite a bit (less difference if cpu has no sse4.1 support).
  92.     */
  93.    if (util_cpu_caps.has_sse2 && n > 1) {
  94.       LLVMValueRef sel, tmp, tmp2;
  95.       struct lp_build_context bld32;
  96.  
  97.       lp_build_context_init(&bld32, gallivm, type);
  98.  
  99.       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
  100.       tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
  101.       sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
  102.       *y = lp_build_select(&bld32, sel, tmp, tmp2);
  103.    } else
  104. #endif
  105.    {
  106.       LLVMValueRef shift;
  107. #ifdef PIPE_ARCH_LITTLE_ENDIAN
  108.       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
  109.       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
  110. #else
  111.       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
  112.       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 16), "");
  113. #endif
  114.       *y = LLVMBuildLShr(builder, packed, shift, "");
  115.    }
  116.  
  117. #ifdef PIPE_ARCH_LITTLE_ENDIAN
  118.    *u = packed;
  119.    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
  120. #else
  121.    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
  122.    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
  123. #endif
  124.  
  125.    mask = lp_build_const_int_vec(gallivm, type, 0xff);
  126.  
  127.    *y = LLVMBuildAnd(builder, *y, mask, "y");
  128.    *u = LLVMBuildAnd(builder, *u, mask, "u");
  129.    *v = LLVMBuildAnd(builder, *v, mask, "v");
  130. }
  131.  
  132.  
  133. /**
  134.  * Extract Y, U, V channels from packed YUYV.
  135.  * @param packed  is a <n x i32> vector with the packed YUYV blocks
  136.  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
  137.  */
  138. static void
  139. yuyv_to_yuv_soa(struct gallivm_state *gallivm,
  140.                 unsigned n,
  141.                 LLVMValueRef packed,
  142.                 LLVMValueRef i,
  143.                 LLVMValueRef *y,
  144.                 LLVMValueRef *u,
  145.                 LLVMValueRef *v)
  146. {
  147.    LLVMBuilderRef builder = gallivm->builder;
  148.    struct lp_type type;
  149.    LLVMValueRef mask;
  150.  
  151.    memset(&type, 0, sizeof type);
  152.    type.width = 32;
  153.    type.length = n;
  154.  
  155.    assert(lp_check_value(type, packed));
  156.    assert(lp_check_value(type, i));
  157.  
  158.    /*
  159.    * Little endian:
  160.     * y = (yuyv >> 16*i) & 0xff
  161.     * u = (yuyv >> 8   ) & 0xff
  162.     * v = (yuyv >> 24  ) & 0xff
  163.     *
  164.     * Big endian:
  165.     * y = (yuyv >> (-16*i + 24) & 0xff
  166.     * u = (yuyv >> 16)          & 0xff
  167.     * v = (yuyv)                & 0xff
  168.     */
  169.  
  170. #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  171.    /*
  172.     * Avoid shift with per-element count.
  173.     * No support on x86, gets translated to roughly 5 instructions
  174.     * per element. Didn't measure performance but cuts shader size
  175.     * by quite a bit (less difference if cpu has no sse4.1 support).
  176.     */
  177.    if (util_cpu_caps.has_sse2 && n > 1) {
  178.       LLVMValueRef sel, tmp;
  179.       struct lp_build_context bld32;
  180.  
  181.       lp_build_context_init(&bld32, gallivm, type);
  182.  
  183.       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
  184.       sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
  185.        *y = lp_build_select(&bld32, sel, packed, tmp);
  186.    } else
  187. #endif
  188.    {
  189.       LLVMValueRef shift;
  190. #ifdef PIPE_ARCH_LITTLE_ENDIAN
  191.       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
  192. #else
  193.       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
  194.       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 24), "");
  195. #endif
  196.       *y = LLVMBuildLShr(builder, packed, shift, "");
  197.    }
  198.  
  199. #ifdef PIPE_ARCH_LITTLE_ENDIAN
  200.    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
  201.    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
  202. #else
  203.    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
  204.    *v = packed;
  205. #endif
  206.  
  207.    mask = lp_build_const_int_vec(gallivm, type, 0xff);
  208.  
  209.    *y = LLVMBuildAnd(builder, *y, mask, "y");
  210.    *u = LLVMBuildAnd(builder, *u, mask, "u");
  211.    *v = LLVMBuildAnd(builder, *v, mask, "v");
  212. }
  213.  
  214.  
  215. static INLINE void
  216. yuv_to_rgb_soa(struct gallivm_state *gallivm,
  217.                unsigned n,
  218.                LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
  219.                LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
  220. {
  221.    LLVMBuilderRef builder = gallivm->builder;
  222.    struct lp_type type;
  223.    struct lp_build_context bld;
  224.  
  225.    LLVMValueRef c0;
  226.    LLVMValueRef c8;
  227.    LLVMValueRef c16;
  228.    LLVMValueRef c128;
  229.    LLVMValueRef c255;
  230.  
  231.    LLVMValueRef cy;
  232.    LLVMValueRef cug;
  233.    LLVMValueRef cub;
  234.    LLVMValueRef cvr;
  235.    LLVMValueRef cvg;
  236.  
  237.    memset(&type, 0, sizeof type);
  238.    type.sign = TRUE;
  239.    type.width = 32;
  240.    type.length = n;
  241.  
  242.    lp_build_context_init(&bld, gallivm, type);
  243.  
  244.    assert(lp_check_value(type, y));
  245.    assert(lp_check_value(type, u));
  246.    assert(lp_check_value(type, v));
  247.  
  248.    /*
  249.     * Constants
  250.     */
  251.  
  252.    c0   = lp_build_const_int_vec(gallivm, type,   0);
  253.    c8   = lp_build_const_int_vec(gallivm, type,   8);
  254.    c16  = lp_build_const_int_vec(gallivm, type,  16);
  255.    c128 = lp_build_const_int_vec(gallivm, type, 128);
  256.    c255 = lp_build_const_int_vec(gallivm, type, 255);
  257.  
  258.    cy  = lp_build_const_int_vec(gallivm, type,  298);
  259.    cug = lp_build_const_int_vec(gallivm, type, -100);
  260.    cub = lp_build_const_int_vec(gallivm, type,  516);
  261.    cvr = lp_build_const_int_vec(gallivm, type,  409);
  262.    cvg = lp_build_const_int_vec(gallivm, type, -208);
  263.  
  264.    /*
  265.     *  y -= 16;
  266.     *  u -= 128;
  267.     *  v -= 128;
  268.     */
  269.  
  270.    y = LLVMBuildSub(builder, y, c16, "");
  271.    u = LLVMBuildSub(builder, u, c128, "");
  272.    v = LLVMBuildSub(builder, v, c128, "");
  273.  
  274.    /*
  275.     * r = 298 * _y            + 409 * _v + 128;
  276.     * g = 298 * _y - 100 * _u - 208 * _v + 128;
  277.     * b = 298 * _y + 516 * _u            + 128;
  278.     */
  279.  
  280.    y = LLVMBuildMul(builder, y, cy, "");
  281.    y = LLVMBuildAdd(builder, y, c128, "");
  282.  
  283.    *r = LLVMBuildMul(builder, v, cvr, "");
  284.    *g = LLVMBuildAdd(builder,
  285.                      LLVMBuildMul(builder, u, cug, ""),
  286.                      LLVMBuildMul(builder, v, cvg, ""),
  287.                      "");
  288.    *b = LLVMBuildMul(builder, u, cub, "");
  289.  
  290.    *r = LLVMBuildAdd(builder, *r, y, "");
  291.    *g = LLVMBuildAdd(builder, *g, y, "");
  292.    *b = LLVMBuildAdd(builder, *b, y, "");
  293.  
  294.    /*
  295.     * r >>= 8;
  296.     * g >>= 8;
  297.     * b >>= 8;
  298.     */
  299.  
  300.    *r = LLVMBuildAShr(builder, *r, c8, "r");
  301.    *g = LLVMBuildAShr(builder, *g, c8, "g");
  302.    *b = LLVMBuildAShr(builder, *b, c8, "b");
  303.  
  304.    /*
  305.     * Clamp
  306.     */
  307.  
  308.    *r = lp_build_clamp(&bld, *r, c0, c255);
  309.    *g = lp_build_clamp(&bld, *g, c0, c255);
  310.    *b = lp_build_clamp(&bld, *b, c0, c255);
  311. }
  312.  
  313.  
  314. static LLVMValueRef
  315. rgb_to_rgba_aos(struct gallivm_state *gallivm,
  316.                 unsigned n,
  317.                 LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
  318. {
  319.    LLVMBuilderRef builder = gallivm->builder;
  320.    struct lp_type type;
  321.    LLVMValueRef a;
  322.    LLVMValueRef rgba;
  323.  
  324.    memset(&type, 0, sizeof type);
  325.    type.sign = TRUE;
  326.    type.width = 32;
  327.    type.length = n;
  328.  
  329.    assert(lp_check_value(type, r));
  330.    assert(lp_check_value(type, g));
  331.    assert(lp_check_value(type, b));
  332.  
  333.    /*
  334.     * Make a 4 x unorm8 vector
  335.     */
  336.  
  337. #ifdef PIPE_ARCH_LITTLE_ENDIAN
  338.    r = r;
  339.    g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
  340.    b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
  341.    a = lp_build_const_int_vec(gallivm, type, 0xff000000);
  342. #else
  343.    r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), "");
  344.    g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), "");
  345.    b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), "");
  346.    a = lp_build_const_int_vec(gallivm, type, 0x000000ff);
  347. #endif
  348.  
  349.    rgba = r;
  350.    rgba = LLVMBuildOr(builder, rgba, g, "");
  351.    rgba = LLVMBuildOr(builder, rgba, b, "");
  352.    rgba = LLVMBuildOr(builder, rgba, a, "");
  353.  
  354.    rgba = LLVMBuildBitCast(builder, rgba,
  355.                            LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
  356.  
  357.    return rgba;
  358. }
  359.  
  360.  
  361. /**
  362.  * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
  363.  */
  364. static LLVMValueRef
  365. uyvy_to_rgba_aos(struct gallivm_state *gallivm,
  366.                  unsigned n,
  367.                  LLVMValueRef packed,
  368.                  LLVMValueRef i)
  369. {
  370.    LLVMValueRef y, u, v;
  371.    LLVMValueRef r, g, b;
  372.    LLVMValueRef rgba;
  373.  
  374.    uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
  375.    yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
  376.    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
  377.  
  378.    return rgba;
  379. }
  380.  
  381.  
  382. /**
  383.  * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
  384.  */
  385. static LLVMValueRef
  386. yuyv_to_rgba_aos(struct gallivm_state *gallivm,
  387.                  unsigned n,
  388.                  LLVMValueRef packed,
  389.                  LLVMValueRef i)
  390. {
  391.    LLVMValueRef y, u, v;
  392.    LLVMValueRef r, g, b;
  393.    LLVMValueRef rgba;
  394.  
  395.    yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
  396.    yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
  397.    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
  398.  
  399.    return rgba;
  400. }
  401.  
  402.  
  403. /**
  404.  * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
  405.  */
  406. static LLVMValueRef
  407. rgbg_to_rgba_aos(struct gallivm_state *gallivm,
  408.                  unsigned n,
  409.                  LLVMValueRef packed,
  410.                  LLVMValueRef i)
  411. {
  412.    LLVMValueRef r, g, b;
  413.    LLVMValueRef rgba;
  414.  
  415.    uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
  416.    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
  417.  
  418.    return rgba;
  419. }
  420.  
  421.  
  422. /**
  423.  * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
  424.  */
  425. static LLVMValueRef
  426. grgb_to_rgba_aos(struct gallivm_state *gallivm,
  427.                  unsigned n,
  428.                  LLVMValueRef packed,
  429.                  LLVMValueRef i)
  430. {
  431.    LLVMValueRef r, g, b;
  432.    LLVMValueRef rgba;
  433.  
  434.    yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
  435.    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
  436.  
  437.    return rgba;
  438. }
  439.  
  440. /**
  441.  * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
  442.  */
  443. static LLVMValueRef
  444. grbr_to_rgba_aos(struct gallivm_state *gallivm,
  445.                  unsigned n,
  446.                  LLVMValueRef packed,
  447.                  LLVMValueRef i)
  448. {
  449.    LLVMValueRef r, g, b;
  450.    LLVMValueRef rgba;
  451.  
  452.    uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
  453.    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
  454.  
  455.    return rgba;
  456. }
  457.  
  458.  
  459. /**
  460.  * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
  461.  */
  462. static LLVMValueRef
  463. rgrb_to_rgba_aos(struct gallivm_state *gallivm,
  464.                  unsigned n,
  465.                  LLVMValueRef packed,
  466.                  LLVMValueRef i)
  467. {
  468.    LLVMValueRef r, g, b;
  469.    LLVMValueRef rgba;
  470.  
  471.    yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
  472.    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
  473.  
  474.    return rgba;
  475. }
  476.  
  477. /**
  478.  * @param n  is the number of pixels processed
  479.  * @param packed  is a <n x i32> vector with the packed YUYV blocks
  480.  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
  481.  * @return  a <4*n x i8> vector with the pixel RGBA values in AoS
  482.  */
  483. LLVMValueRef
  484. lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
  485.                                    const struct util_format_description *format_desc,
  486.                                    unsigned n,
  487.                                    LLVMValueRef base_ptr,
  488.                                    LLVMValueRef offset,
  489.                                    LLVMValueRef i,
  490.                                    LLVMValueRef j)
  491. {
  492.    LLVMValueRef packed;
  493.    LLVMValueRef rgba;
  494.  
  495.    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
  496.    assert(format_desc->block.bits == 32);
  497.    assert(format_desc->block.width == 2);
  498.    assert(format_desc->block.height == 1);
  499.  
  500.    packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset, FALSE);
  501.  
  502.    (void)j;
  503.  
  504.    switch (format_desc->format) {
  505.    case PIPE_FORMAT_UYVY:
  506.       rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
  507.       break;
  508.    case PIPE_FORMAT_YUYV:
  509.       rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
  510.       break;
  511.    case PIPE_FORMAT_R8G8_B8G8_UNORM:
  512.       rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
  513.       break;
  514.    case PIPE_FORMAT_G8R8_G8B8_UNORM:
  515.       rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
  516.       break;
  517.    case PIPE_FORMAT_G8R8_B8R8_UNORM:
  518.       rgba = grbr_to_rgba_aos(gallivm, n, packed, i);
  519.       break;
  520.    case PIPE_FORMAT_R8G8_R8B8_UNORM:
  521.       rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);
  522.       break;
  523.    default:
  524.       assert(0);
  525.       rgba =  LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
  526.       break;
  527.    }
  528.  
  529.    return rgba;
  530. }
  531.  
  532.