Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (C) 2014 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * \file texcompress_bptc.c
  26.  * GL_ARB_texture_compression_bptc support.
  27.  */
  28.  
  29. #include <stdbool.h>
  30. #include "texcompress.h"
  31. #include "texcompress_bptc.h"
  32. #include "util/format_srgb.h"
  33. #include "texstore.h"
  34. #include "macros.h"
  35. #include "image.h"
  36.  
  37. #define BLOCK_SIZE 4
  38. #define N_PARTITIONS 64
  39. #define BLOCK_BYTES 16
  40.  
  41. struct bptc_unorm_mode {
  42.    int n_subsets;
  43.    int n_partition_bits;
  44.    bool has_rotation_bits;
  45.    bool has_index_selection_bit;
  46.    int n_color_bits;
  47.    int n_alpha_bits;
  48.    bool has_endpoint_pbits;
  49.    bool has_shared_pbits;
  50.    int n_index_bits;
  51.    int n_secondary_index_bits;
  52. };
  53.  
  54. struct bptc_float_bitfield {
  55.    int8_t endpoint;
  56.    uint8_t component;
  57.    uint8_t offset;
  58.    uint8_t n_bits;
  59.    bool reverse;
  60. };
  61.  
  62. struct bptc_float_mode {
  63.    bool reserved;
  64.    bool transformed_endpoints;
  65.    int n_partition_bits;
  66.    int n_endpoint_bits;
  67.    int n_index_bits;
  68.    int n_delta_bits[3];
  69.    struct bptc_float_bitfield bitfields[24];
  70. };
  71.  
  72. struct bit_writer {
  73.    uint8_t buf;
  74.    int pos;
  75.    uint8_t *dst;
  76. };
  77.  
  78. static const struct bptc_unorm_mode
  79. bptc_unorm_modes[] = {
  80.    /* 0 */ { 3, 4, false, false, 4, 0, true,  false, 3, 0 },
  81.    /* 1 */ { 2, 6, false, false, 6, 0, false, true,  3, 0 },
  82.    /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
  83.    /* 3 */ { 2, 6, false, false, 7, 0, true,  false, 2, 0 },
  84.    /* 4 */ { 1, 0, true,  true,  5, 6, false, false, 2, 3 },
  85.    /* 5 */ { 1, 0, true,  false, 7, 8, false, false, 2, 2 },
  86.    /* 6 */ { 1, 0, false, false, 7, 7, true,  false, 4, 0 },
  87.    /* 7 */ { 2, 6, false, false, 5, 5, true,  false, 2, 0 }
  88. };
  89.  
  90. static const struct bptc_float_mode
  91. bptc_float_modes[] = {
  92.    /* 00 */
  93.    { false, true, 5, 10, 3, { 5, 5, 5 },
  94.      { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
  95.        { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
  96.        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
  97.        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
  98.        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
  99.        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
  100.        { 3, 2, 3, 1, false },
  101.        { -1 } }
  102.    },
  103.    /* 01 */
  104.    { false, true, 5, 7, 3, { 6, 6, 6 },
  105.      { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
  106.        { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
  107.        { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
  108.        { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
  109.        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
  110.        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
  111.        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
  112.        { 2, 0, 0, 6, false },
  113.        { 3, 0, 0, 6, false },
  114.        { -1 } }
  115.    },
  116.    /* 00010 */
  117.    { false, true, 5, 11, 3, { 5, 4, 4 },
  118.      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
  119.        { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
  120.        { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
  121.        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
  122.        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
  123.        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
  124.        { -1 } }
  125.    },
  126.    /* 00011 */
  127.    { false, false, 0, 10, 4, { 10, 10, 10 },
  128.      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
  129.        { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
  130.        { -1 } }
  131.    },
  132.    /* 00110 */
  133.    { false, true, 5, 11, 3, { 4, 5, 4 },
  134.      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
  135.        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
  136.        { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
  137.        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
  138.        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
  139.        { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
  140.        { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
  141.        { -1 } }
  142.    },
  143.    /* 00111 */
  144.    { false, true, 0, 11, 4, { 9, 9, 9 },
  145.      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
  146.        { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
  147.        { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
  148.        { -1 } }
  149.    },
  150.    /* 01010 */
  151.    { false, true, 5, 11, 3, { 4, 4, 5 },
  152.      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
  153.        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
  154.        { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
  155.        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
  156.        { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
  157.        { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
  158.        { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
  159.        { -1 } }
  160.    },
  161.    /* 01011 */
  162.    { false, true, 0, 12, 4, { 8, 8, 8 },
  163.      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
  164.        { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
  165.        { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
  166.        { -1 } }
  167.    },
  168.    /* 01110 */
  169.    { false, true, 5, 9, 3, { 5, 5, 5 },
  170.      { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
  171.        { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
  172.        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
  173.        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
  174.        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
  175.        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
  176.        { 3, 2, 3, 1, false },
  177.        { -1 } }
  178.    },
  179.    /* 01111 */
  180.    { false, true, 0, 16, 4, { 4, 4, 4 },
  181.      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
  182.        { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
  183.        { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
  184.        { -1 } }
  185.    },
  186.    /* 10010 */
  187.    { false, true, 5, 8, 3, { 6, 5, 5 },
  188.      { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
  189.        { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
  190.        { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
  191.        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
  192.        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
  193.        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
  194.        { 3, 0, 0, 6, false },
  195.        { -1 } }
  196.    },
  197.    /* 10011 */
  198.    { true /* reserved */ },
  199.    /* 10110 */
  200.    { false, true, 5, 8, 3, { 5, 6, 5 },
  201.      { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
  202.        { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
  203.        { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
  204.        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
  205.        { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
  206.        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
  207.        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
  208.        { -1 } }
  209.    },
  210.    /* 10111 */
  211.    { true /* reserved */ },
  212.    /* 11010 */
  213.    { false, true, 5, 8, 3, { 5, 5, 6 },
  214.      { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
  215.        { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
  216.        { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
  217.        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
  218.        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
  219.        { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
  220.        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
  221.        { -1 } }
  222.    },
  223.    /* 11011 */
  224.    { true /* reserved */ },
  225.    /* 11110 */
  226.    { false, false, 5, 6, 3, { 6, 6, 6 },
  227.      { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
  228.        { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
  229.        { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
  230.        { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
  231.        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
  232.        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
  233.        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
  234.        { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
  235.        { -1 } }
  236.    },
  237.    /* 11111 */
  238.    { true /* reserved */ },
  239. };
  240.  
  241. /* This partition table is used when the mode has two subsets. Each
  242.  * partition is represented by a 32-bit value which gives 2 bits per texel
  243.  * within the block. The value of the two bits represents which subset to use
  244.  * (0 or 1).
  245.  */
  246. static const uint32_t
  247. partition_table1[N_PARTITIONS] = {
  248.    0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
  249.    0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
  250.    0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
  251.    0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
  252.    0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
  253.    0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
  254.    0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
  255.    0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
  256.    0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
  257.    0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
  258.    0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
  259.    0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
  260.    0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
  261.    0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
  262.    0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
  263.    0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
  264. };
  265.  
  266. /* This partition table is used when the mode has three subsets. In this case
  267.  * the values can be 0, 1 or 2.
  268.  */
  269. static const uint32_t
  270. partition_table2[N_PARTITIONS] = {
  271.    0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
  272.    0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
  273.    0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
  274.    0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
  275.    0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
  276.    0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
  277.    0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
  278.    0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
  279.    0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
  280.    0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
  281.    0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
  282.    0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
  283.    0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
  284.    0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
  285.    0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
  286.    0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
  287. };
  288.  
  289. static const uint8_t
  290. anchor_indices[][N_PARTITIONS] = {
  291.    /* Anchor index values for the second subset of two-subset partitioning */
  292.    {
  293.       0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
  294.       0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
  295.       0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
  296.       0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
  297.    },
  298.  
  299.    /* Anchor index values for the second subset of three-subset partitioning */
  300.    {
  301.       0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
  302.       0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
  303.       0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
  304.       0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
  305.    },
  306.  
  307.    /* Anchor index values for the third subset of three-subset
  308.     * partitioning
  309.     */
  310.    {
  311.       0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
  312.       0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
  313.       0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
  314.       0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
  315.    }
  316. };
  317.  
  318. static int
  319. extract_bits(const uint8_t *block,
  320.              int offset,
  321.              int n_bits)
  322. {
  323.    int byte_index = offset / 8;
  324.    int bit_index = offset % 8;
  325.    int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
  326.    int result = 0;
  327.    int bit = 0;
  328.  
  329.    while (true) {
  330.       result |= ((block[byte_index] >> bit_index) &
  331.                  ((1 << n_bits_in_byte) - 1)) << bit;
  332.  
  333.       n_bits -= n_bits_in_byte;
  334.  
  335.       if (n_bits <= 0)
  336.          return result;
  337.  
  338.       bit += n_bits_in_byte;
  339.       byte_index++;
  340.       bit_index = 0;
  341.       n_bits_in_byte = MIN2(n_bits, 8);
  342.    }
  343. }
  344.  
  345. static uint8_t
  346. expand_component(uint8_t byte,
  347.                  int n_bits)
  348. {
  349.    /* Expands a n-bit quantity into a byte by copying the most-significant
  350.     * bits into the unused least-significant bits.
  351.     */
  352.    return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
  353. }
  354.  
  355. static int
  356. extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
  357.                         const uint8_t *block,
  358.                         int bit_offset,
  359.                         uint8_t endpoints[][4])
  360. {
  361.    int component;
  362.    int subset;
  363.    int endpoint;
  364.    int pbit;
  365.    int n_components;
  366.  
  367.    /* Extract each color component */
  368.    for (component = 0; component < 3; component++) {
  369.       for (subset = 0; subset < mode->n_subsets; subset++) {
  370.          for (endpoint = 0; endpoint < 2; endpoint++) {
  371.             endpoints[subset * 2 + endpoint][component] =
  372.                extract_bits(block, bit_offset, mode->n_color_bits);
  373.             bit_offset += mode->n_color_bits;
  374.          }
  375.       }
  376.    }
  377.  
  378.    /* Extract the alpha values */
  379.    if (mode->n_alpha_bits > 0) {
  380.       for (subset = 0; subset < mode->n_subsets; subset++) {
  381.          for (endpoint = 0; endpoint < 2; endpoint++) {
  382.             endpoints[subset * 2 + endpoint][3] =
  383.                extract_bits(block, bit_offset, mode->n_alpha_bits);
  384.             bit_offset += mode->n_alpha_bits;
  385.          }
  386.       }
  387.  
  388.       n_components = 4;
  389.    } else {
  390.       for (subset = 0; subset < mode->n_subsets; subset++)
  391.          for (endpoint = 0; endpoint < 2; endpoint++)
  392.             endpoints[subset * 2 + endpoint][3] = 255;
  393.  
  394.       n_components = 3;
  395.    }
  396.  
  397.    /* Add in the p-bits */
  398.    if (mode->has_endpoint_pbits) {
  399.       for (subset = 0; subset < mode->n_subsets; subset++) {
  400.          for (endpoint = 0; endpoint < 2; endpoint++) {
  401.             pbit = extract_bits(block, bit_offset, 1);
  402.             bit_offset += 1;
  403.  
  404.             for (component = 0; component < n_components; component++) {
  405.                endpoints[subset * 2 + endpoint][component] <<= 1;
  406.                endpoints[subset * 2 + endpoint][component] |= pbit;
  407.             }
  408.          }
  409.       }
  410.    } else if (mode->has_shared_pbits) {
  411.       for (subset = 0; subset < mode->n_subsets; subset++) {
  412.          pbit = extract_bits(block, bit_offset, 1);
  413.          bit_offset += 1;
  414.  
  415.          for (endpoint = 0; endpoint < 2; endpoint++) {
  416.             for (component = 0; component < n_components; component++) {
  417.                endpoints[subset * 2 + endpoint][component] <<= 1;
  418.                endpoints[subset * 2 + endpoint][component] |= pbit;
  419.             }
  420.          }
  421.       }
  422.    }
  423.  
  424.    /* Expand the n-bit values to a byte */
  425.    for (subset = 0; subset < mode->n_subsets; subset++) {
  426.       for (endpoint = 0; endpoint < 2; endpoint++) {
  427.          for (component = 0; component < 3; component++) {
  428.             endpoints[subset * 2 + endpoint][component] =
  429.                expand_component(endpoints[subset * 2 + endpoint][component],
  430.                                 mode->n_color_bits +
  431.                                 mode->has_endpoint_pbits +
  432.                                 mode->has_shared_pbits);
  433.          }
  434.  
  435.          if (mode->n_alpha_bits > 0) {
  436.             endpoints[subset * 2 + endpoint][3] =
  437.                expand_component(endpoints[subset * 2 + endpoint][3],
  438.                                 mode->n_alpha_bits +
  439.                                 mode->has_endpoint_pbits +
  440.                                 mode->has_shared_pbits);
  441.          }
  442.       }
  443.    }
  444.  
  445.    return bit_offset;
  446. }
  447.  
  448. static bool
  449. is_anchor(int n_subsets,
  450.           int partition_num,
  451.           int texel)
  452. {
  453.    if (texel == 0)
  454.       return true;
  455.  
  456.    switch (n_subsets) {
  457.    case 1:
  458.       return false;
  459.    case 2:
  460.       return anchor_indices[0][partition_num] == texel;
  461.    case 3:
  462.       return (anchor_indices[1][partition_num] == texel ||
  463.               anchor_indices[2][partition_num] == texel);
  464.    default:
  465.       assert(false);
  466.       return false;
  467.    }
  468. }
  469.  
  470. static int
  471. count_anchors_before_texel(int n_subsets,
  472.                            int partition_num,
  473.                            int texel)
  474. {
  475.    int count = 1;
  476.  
  477.    if (texel == 0)
  478.       return 0;
  479.  
  480.    switch (n_subsets) {
  481.    case 1:
  482.       break;
  483.    case 2:
  484.       if (texel > anchor_indices[0][partition_num])
  485.          count++;
  486.       break;
  487.    case 3:
  488.       if (texel > anchor_indices[1][partition_num])
  489.          count++;
  490.       if (texel > anchor_indices[2][partition_num])
  491.          count++;
  492.       break;
  493.    default:
  494.       assert(false);
  495.       return 0;
  496.    }
  497.  
  498.    return count;
  499. }
  500.  
  501. static int32_t
  502. interpolate(int32_t a, int32_t b,
  503.             int index,
  504.             int index_bits)
  505. {
  506.    static const uint8_t weights2[] = { 0, 21, 43, 64 };
  507.    static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
  508.    static const uint8_t weights4[] =
  509.       { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
  510.    static const uint8_t *weights[] = {
  511.       NULL, NULL, weights2, weights3, weights4
  512.    };
  513.    int weight;
  514.  
  515.    weight = weights[index_bits][index];
  516.  
  517.    return ((64 - weight) * a + weight * b + 32) >> 6;
  518. }
  519.  
  520. static void
  521. apply_rotation(int rotation,
  522.                uint8_t *result)
  523. {
  524.    uint8_t t;
  525.  
  526.    if (rotation == 0)
  527.       return;
  528.  
  529.    rotation--;
  530.  
  531.    t = result[rotation];
  532.    result[rotation] = result[3];
  533.    result[3] = t;
  534. }
  535.  
  536. static void
  537. fetch_rgba_unorm_from_block(const uint8_t *block,
  538.                             uint8_t *result,
  539.                             int texel)
  540. {
  541.    int mode_num = ffs(block[0]);
  542.    const struct bptc_unorm_mode *mode;
  543.    int bit_offset, secondary_bit_offset;
  544.    int partition_num;
  545.    int subset_num;
  546.    int rotation;
  547.    int index_selection;
  548.    int index_bits;
  549.    int indices[2];
  550.    int index;
  551.    int anchors_before_texel;
  552.    bool anchor;
  553.    uint8_t endpoints[3 * 2][4];
  554.    uint32_t subsets;
  555.    int component;
  556.  
  557.    if (mode_num == 0) {
  558.       /* According to the spec this mode is reserved and shouldn't be used. */
  559.       memset(result, 0, 3);
  560.       result[3] = 0xff;
  561.       return;
  562.    }
  563.  
  564.    mode = bptc_unorm_modes + mode_num - 1;
  565.    bit_offset = mode_num;
  566.  
  567.    partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
  568.    bit_offset += mode->n_partition_bits;
  569.  
  570.    switch (mode->n_subsets) {
  571.    case 1:
  572.       subsets = 0;
  573.       break;
  574.    case 2:
  575.       subsets = partition_table1[partition_num];
  576.       break;
  577.    case 3:
  578.       subsets = partition_table2[partition_num];
  579.       break;
  580.    default:
  581.       assert(false);
  582.       return;
  583.    }
  584.  
  585.    if (mode->has_rotation_bits) {
  586.       rotation = extract_bits(block, bit_offset, 2);
  587.       bit_offset += 2;
  588.    } else {
  589.       rotation = 0;
  590.    }
  591.  
  592.    if (mode->has_index_selection_bit) {
  593.       index_selection = extract_bits(block, bit_offset, 1);
  594.       bit_offset++;
  595.    } else {
  596.       index_selection = 0;
  597.    }
  598.  
  599.    bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
  600.  
  601.    anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
  602.                                                      partition_num, texel);
  603.  
  604.    /* Calculate the offset to the secondary index */
  605.    secondary_bit_offset = (bit_offset +
  606.                            BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
  607.                            mode->n_subsets +
  608.                            mode->n_secondary_index_bits * texel -
  609.                            anchors_before_texel);
  610.  
  611.    /* Calculate the offset to the primary index for this texel */
  612.    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
  613.  
  614.    subset_num = (subsets >> (texel * 2)) & 3;
  615.  
  616.    anchor = is_anchor(mode->n_subsets, partition_num, texel);
  617.  
  618.    index_bits = mode->n_index_bits;
  619.    if (anchor)
  620.       index_bits--;
  621.    indices[0] = extract_bits(block, bit_offset, index_bits);
  622.  
  623.    if (mode->n_secondary_index_bits) {
  624.       index_bits = mode->n_secondary_index_bits;
  625.       if (anchor)
  626.          index_bits--;
  627.       indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
  628.    }
  629.  
  630.    index = indices[index_selection];
  631.    index_bits = (index_selection ?
  632.                  mode->n_secondary_index_bits :
  633.                  mode->n_index_bits);
  634.  
  635.    for (component = 0; component < 3; component++)
  636.       result[component] = interpolate(endpoints[subset_num * 2][component],
  637.                                       endpoints[subset_num * 2 + 1][component],
  638.                                       index,
  639.                                       index_bits);
  640.  
  641.    /* Alpha uses the opposite index from the color components */
  642.    if (mode->n_secondary_index_bits && !index_selection) {
  643.       index = indices[1];
  644.       index_bits = mode->n_secondary_index_bits;
  645.    } else {
  646.       index = indices[0];
  647.       index_bits = mode->n_index_bits;
  648.    }
  649.  
  650.    result[3] = interpolate(endpoints[subset_num * 2][3],
  651.                            endpoints[subset_num * 2 + 1][3],
  652.                            index,
  653.                            index_bits);
  654.  
  655.    apply_rotation(rotation, result);
  656. }
  657.  
  658. static void
  659. fetch_bptc_rgba_unorm_bytes(const GLubyte *map,
  660.                             GLint rowStride, GLint i, GLint j,
  661.                             GLubyte *texel)
  662. {
  663.    const GLubyte *block;
  664.  
  665.    block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
  666.  
  667.    fetch_rgba_unorm_from_block(block, texel, (i % 4) + (j % 4) * 4);
  668. }
  669.  
  670. static void
  671. fetch_bptc_rgba_unorm(const GLubyte *map,
  672.                       GLint rowStride, GLint i, GLint j,
  673.                       GLfloat *texel)
  674. {
  675.    GLubyte texel_bytes[4];
  676.  
  677.    fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
  678.  
  679.    texel[RCOMP] = UBYTE_TO_FLOAT(texel_bytes[0]);
  680.    texel[GCOMP] = UBYTE_TO_FLOAT(texel_bytes[1]);
  681.    texel[BCOMP] = UBYTE_TO_FLOAT(texel_bytes[2]);
  682.    texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
  683. }
  684.  
  685. static void
  686. fetch_bptc_srgb_alpha_unorm(const GLubyte *map,
  687.                             GLint rowStride, GLint i, GLint j,
  688.                             GLfloat *texel)
  689. {
  690.    GLubyte texel_bytes[4];
  691.  
  692.    fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
  693.  
  694.    texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[0]);
  695.    texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[1]);
  696.    texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[2]);
  697.    texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
  698. }
  699.  
  700. static int32_t
  701. sign_extend(int32_t value,
  702.             int n_bits)
  703. {
  704.    if ((value & (1 << (n_bits - 1)))) {
  705.       value |= (~(int32_t) 0) << n_bits;
  706.    }
  707.  
  708.    return value;
  709. }
  710.  
  711. static int
  712. signed_unquantize(int value, int n_endpoint_bits)
  713. {
  714.    bool sign;
  715.  
  716.    if (n_endpoint_bits >= 16)
  717.       return value;
  718.  
  719.    if (value == 0)
  720.       return 0;
  721.  
  722.    sign = false;
  723.  
  724.    if (value < 0) {
  725.       sign = true;
  726.       value = -value;
  727.    }
  728.  
  729.    if (value >= (1 << (n_endpoint_bits - 1)) - 1)
  730.       value = 0x7fff;
  731.    else
  732.       value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
  733.  
  734.    if (sign)
  735.       value = -value;
  736.  
  737.    return value;
  738. }
  739.  
  740. static int
  741. unsigned_unquantize(int value, int n_endpoint_bits)
  742. {
  743.    if (n_endpoint_bits >= 15)
  744.       return value;
  745.  
  746.    if (value == 0)
  747.       return 0;
  748.  
  749.    if (value == (1 << n_endpoint_bits) - 1)
  750.       return 0xffff;
  751.  
  752.    return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
  753. }
  754.  
  755. static int
  756. extract_float_endpoints(const struct bptc_float_mode *mode,
  757.                         const uint8_t *block,
  758.                         int bit_offset,
  759.                         int32_t endpoints[][3],
  760.                         bool is_signed)
  761. {
  762.    const struct bptc_float_bitfield *bitfield;
  763.    int endpoint, component;
  764.    int n_endpoints;
  765.    int value;
  766.    int i;
  767.  
  768.    if (mode->n_partition_bits)
  769.       n_endpoints = 4;
  770.    else
  771.       n_endpoints = 2;
  772.  
  773.    memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
  774.  
  775.    for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
  776.       value = extract_bits(block, bit_offset, bitfield->n_bits);
  777.       bit_offset += bitfield->n_bits;
  778.  
  779.       if (bitfield->reverse) {
  780.          for (i = 0; i < bitfield->n_bits; i++) {
  781.             if (value & (1 << i))
  782.                endpoints[bitfield->endpoint][bitfield->component] |=
  783.                   1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
  784.          }
  785.       } else {
  786.          endpoints[bitfield->endpoint][bitfield->component] |=
  787.             value << bitfield->offset;
  788.       }
  789.    }
  790.  
  791.    if (mode->transformed_endpoints) {
  792.       /* The endpoints are specified as signed offsets from e0 */
  793.       for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
  794.          for (component = 0; component < 3; component++) {
  795.             value = sign_extend(endpoints[endpoint][component],
  796.                                 mode->n_delta_bits[component]);
  797.             endpoints[endpoint][component] =
  798.                ((endpoints[0][component] + value) &
  799.                 ((1 << mode->n_endpoint_bits) - 1));
  800.          }
  801.       }
  802.    }
  803.  
  804.    if (is_signed) {
  805.       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
  806.          for (component = 0; component < 3; component++) {
  807.             value = sign_extend(endpoints[endpoint][component],
  808.                                 mode->n_endpoint_bits);
  809.             endpoints[endpoint][component] =
  810.                signed_unquantize(value, mode->n_endpoint_bits);
  811.          }
  812.       }
  813.    } else {
  814.       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
  815.          for (component = 0; component < 3; component++) {
  816.             endpoints[endpoint][component] =
  817.                unsigned_unquantize(endpoints[endpoint][component],
  818.                                    mode->n_endpoint_bits);
  819.          }
  820.       }
  821.    }
  822.  
  823.    return bit_offset;
  824. }
  825.  
  826. static int32_t
  827. finish_unsigned_unquantize(int32_t value)
  828. {
  829.    return value * 31 / 64;
  830. }
  831.  
  832. static int32_t
  833. finish_signed_unquantize(int32_t value)
  834. {
  835.    if (value < 0)
  836.       return (-value * 31 / 32) | 0x8000;
  837.    else
  838.       return value * 31 / 32;
  839. }
  840.  
  841. static void
  842. fetch_rgb_float_from_block(const uint8_t *block,
  843.                            float *result,
  844.                            int texel,
  845.                            bool is_signed)
  846. {
  847.    int mode_num;
  848.    const struct bptc_float_mode *mode;
  849.    int bit_offset;
  850.    int partition_num;
  851.    int subset_num;
  852.    int index_bits;
  853.    int index;
  854.    int anchors_before_texel;
  855.    int32_t endpoints[2 * 2][3];
  856.    uint32_t subsets;
  857.    int n_subsets;
  858.    int component;
  859.    int32_t value;
  860.  
  861.    if (block[0] & 0x2) {
  862.       mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
  863.       bit_offset = 5;
  864.    } else {
  865.       mode_num = block[0] & 3;
  866.       bit_offset = 2;
  867.    }
  868.  
  869.    mode = bptc_float_modes + mode_num;
  870.  
  871.    if (mode->reserved) {
  872.       memset(result, 0, sizeof result[0] * 3);
  873.       result[3] = 1.0f;
  874.       return;
  875.    }
  876.  
  877.    bit_offset = extract_float_endpoints(mode, block, bit_offset,
  878.                                         endpoints, is_signed);
  879.  
  880.    if (mode->n_partition_bits) {
  881.       partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
  882.       bit_offset += mode->n_partition_bits;
  883.  
  884.       subsets = partition_table1[partition_num];
  885.       n_subsets = 2;
  886.    } else {
  887.       partition_num = 0;
  888.       subsets = 0;
  889.       n_subsets = 1;
  890.    }
  891.  
  892.    anchors_before_texel =
  893.       count_anchors_before_texel(n_subsets, partition_num, texel);
  894.  
  895.    /* Calculate the offset to the primary index for this texel */
  896.    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
  897.  
  898.    subset_num = (subsets >> (texel * 2)) & 3;
  899.  
  900.    index_bits = mode->n_index_bits;
  901.    if (is_anchor(n_subsets, partition_num, texel))
  902.       index_bits--;
  903.    index = extract_bits(block, bit_offset, index_bits);
  904.  
  905.    for (component = 0; component < 3; component++) {
  906.       value = interpolate(endpoints[subset_num * 2][component],
  907.                           endpoints[subset_num * 2 + 1][component],
  908.                           index,
  909.                           mode->n_index_bits);
  910.  
  911.       if (is_signed)
  912.          value = finish_signed_unquantize(value);
  913.       else
  914.          value = finish_unsigned_unquantize(value);
  915.  
  916.       result[component] = _mesa_half_to_float(value);
  917.    }
  918.  
  919.    result[3] = 1.0f;
  920. }
  921.  
  922. static void
  923. fetch_bptc_rgb_float(const GLubyte *map,
  924.                      GLint rowStride, GLint i, GLint j,
  925.                      GLfloat *texel,
  926.                      bool is_signed)
  927. {
  928.    const GLubyte *block;
  929.  
  930.    block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
  931.  
  932.    fetch_rgb_float_from_block(block, texel, (i % 4) + (j % 4) * 4, is_signed);
  933. }
  934.  
  935. static void
  936. fetch_bptc_rgb_signed_float(const GLubyte *map,
  937.                             GLint rowStride, GLint i, GLint j,
  938.                             GLfloat *texel)
  939. {
  940.    fetch_bptc_rgb_float(map, rowStride, i, j, texel, true);
  941. }
  942.  
  943. static void
  944. fetch_bptc_rgb_unsigned_float(const GLubyte *map,
  945.                               GLint rowStride, GLint i, GLint j,
  946.                               GLfloat *texel)
  947. {
  948.    fetch_bptc_rgb_float(map, rowStride, i, j, texel, false);
  949. }
  950.  
  951. compressed_fetch_func
  952. _mesa_get_bptc_fetch_func(mesa_format format)
  953. {
  954.    switch (format) {
  955.    case MESA_FORMAT_BPTC_RGBA_UNORM:
  956.       return fetch_bptc_rgba_unorm;
  957.    case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM:
  958.       return fetch_bptc_srgb_alpha_unorm;
  959.    case MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT:
  960.       return fetch_bptc_rgb_signed_float;
  961.    case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT:
  962.       return fetch_bptc_rgb_unsigned_float;
  963.    default:
  964.       return NULL;
  965.    }
  966. }
  967.  
  968. static void
  969. write_bits(struct bit_writer *writer, int n_bits, int value)
  970. {
  971.    do {
  972.       if (n_bits + writer->pos >= 8) {
  973.          *(writer->dst++) = writer->buf | (value << writer->pos);
  974.          writer->buf = 0;
  975.          value >>= (8 - writer->pos);
  976.          n_bits -= (8 - writer->pos);
  977.          writer->pos = 0;
  978.       } else {
  979.          writer->buf |= value << writer->pos;
  980.          writer->pos += n_bits;
  981.          break;
  982.       }
  983.    } while (n_bits > 0);
  984. }
  985.  
  986. static void
  987. get_average_luminance_alpha_unorm(int width, int height,
  988.                                   const uint8_t *src, int src_rowstride,
  989.                                   int *average_luminance, int *average_alpha)
  990. {
  991.    int luminance_sum = 0, alpha_sum = 0;
  992.    int y, x;
  993.  
  994.    for (y = 0; y < height; y++) {
  995.       for (x = 0; x < width; x++) {
  996.          luminance_sum += src[0] + src[1] + src[2];
  997.          alpha_sum += src[3];
  998.          src += 4;
  999.       }
  1000.       src += src_rowstride - width * 4;
  1001.    }
  1002.  
  1003.    *average_luminance = luminance_sum / (width * height);
  1004.    *average_alpha = alpha_sum / (width * height);
  1005. }
  1006.  
  1007. static void
  1008. get_rgba_endpoints_unorm(int width, int height,
  1009.                          const uint8_t *src, int src_rowstride,
  1010.                          int average_luminance, int average_alpha,
  1011.                          uint8_t endpoints[][4])
  1012. {
  1013.    int endpoint_luminances[2];
  1014.    int midpoint;
  1015.    int sums[2][4];
  1016.    int endpoint;
  1017.    int luminance;
  1018.    uint8_t temp[3];
  1019.    const uint8_t *p = src;
  1020.    int rgb_left_endpoint_count = 0;
  1021.    int alpha_left_endpoint_count = 0;
  1022.    int y, x, i;
  1023.  
  1024.    memset(sums, 0, sizeof sums);
  1025.  
  1026.    for (y = 0; y < height; y++) {
  1027.       for (x = 0; x < width; x++) {
  1028.          luminance = p[0] + p[1] + p[2];
  1029.          if (luminance < average_luminance) {
  1030.             endpoint = 0;
  1031.             rgb_left_endpoint_count++;
  1032.          } else {
  1033.             endpoint = 1;
  1034.          }
  1035.          for (i = 0; i < 3; i++)
  1036.             sums[endpoint][i] += p[i];
  1037.  
  1038.          if (p[2] < average_alpha) {
  1039.             endpoint = 0;
  1040.             alpha_left_endpoint_count++;
  1041.          } else {
  1042.             endpoint = 1;
  1043.          }
  1044.          sums[endpoint][3] += p[3];
  1045.  
  1046.          p += 4;
  1047.       }
  1048.  
  1049.       p += src_rowstride - width * 4;
  1050.    }
  1051.  
  1052.    if (rgb_left_endpoint_count == 0 ||
  1053.        rgb_left_endpoint_count == width * height) {
  1054.       for (i = 0; i < 3; i++)
  1055.          endpoints[0][i] = endpoints[1][i] =
  1056.             (sums[0][i] + sums[1][i]) / (width * height);
  1057.    } else {
  1058.       for (i = 0; i < 3; i++) {
  1059.          endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
  1060.          endpoints[1][i] = (sums[1][i] /
  1061.                             (width * height - rgb_left_endpoint_count));
  1062.       }
  1063.    }
  1064.  
  1065.    if (alpha_left_endpoint_count == 0 ||
  1066.        alpha_left_endpoint_count == width * height) {
  1067.       endpoints[0][3] = endpoints[1][3] =
  1068.          (sums[0][3] + sums[1][3]) / (width * height);
  1069.    } else {
  1070.          endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
  1071.          endpoints[1][3] = (sums[1][3] /
  1072.                             (width * height - alpha_left_endpoint_count));
  1073.    }
  1074.  
  1075.    /* We may need to swap the endpoints to ensure the most-significant bit of
  1076.     * the first index is zero */
  1077.  
  1078.    for (endpoint = 0; endpoint < 2; endpoint++) {
  1079.       endpoint_luminances[endpoint] =
  1080.          endpoints[endpoint][0] +
  1081.          endpoints[endpoint][1] +
  1082.          endpoints[endpoint][2];
  1083.    }
  1084.    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
  1085.  
  1086.    if ((src[0] + src[1] + src[2] <= midpoint) !=
  1087.        (endpoint_luminances[0] <= midpoint)) {
  1088.       memcpy(temp, endpoints[0], 3);
  1089.       memcpy(endpoints[0], endpoints[1], 3);
  1090.       memcpy(endpoints[1], temp, 3);
  1091.    }
  1092.  
  1093.    /* Same for the alpha endpoints */
  1094.  
  1095.    midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
  1096.  
  1097.    if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
  1098.       temp[0] = endpoints[0][3];
  1099.       endpoints[0][3] = endpoints[1][3];
  1100.       endpoints[1][3] = temp[0];
  1101.    }
  1102. }
  1103.  
  1104. static void
  1105. write_rgb_indices_unorm(struct bit_writer *writer,
  1106.                         int src_width, int src_height,
  1107.                         const uint8_t *src, int src_rowstride,
  1108.                         uint8_t endpoints[][4])
  1109. {
  1110.    int luminance;
  1111.    int endpoint_luminances[2];
  1112.    int endpoint;
  1113.    int index;
  1114.    int y, x;
  1115.  
  1116.    for (endpoint = 0; endpoint < 2; endpoint++) {
  1117.       endpoint_luminances[endpoint] =
  1118.          endpoints[endpoint][0] +
  1119.          endpoints[endpoint][1] +
  1120.          endpoints[endpoint][2];
  1121.    }
  1122.  
  1123.    /* If the endpoints have the same luminance then we'll just use index 0 for
  1124.     * all of the texels */
  1125.    if (endpoint_luminances[0] == endpoint_luminances[1]) {
  1126.       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
  1127.       return;
  1128.    }
  1129.  
  1130.    for (y = 0; y < src_height; y++) {
  1131.       for (x = 0; x < src_width; x++) {
  1132.          luminance = src[0] + src[1] + src[2];
  1133.  
  1134.          index = ((luminance - endpoint_luminances[0]) * 3 /
  1135.                   (endpoint_luminances[1] - endpoint_luminances[0]));
  1136.          if (index < 0)
  1137.             index = 0;
  1138.          else if (index > 3)
  1139.             index = 3;
  1140.  
  1141.          assert(x != 0 || y != 0 || index < 2);
  1142.  
  1143.          write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
  1144.  
  1145.          src += 4;
  1146.       }
  1147.  
  1148.       /* Pad the indices out to the block size */
  1149.       if (src_width < BLOCK_SIZE)
  1150.          write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
  1151.  
  1152.       src += src_rowstride - src_width * 4;
  1153.    }
  1154.  
  1155.    /* Pad the indices out to the block size */
  1156.    if (src_height < BLOCK_SIZE)
  1157.       write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
  1158. }
  1159.  
  1160. static void
  1161. write_alpha_indices_unorm(struct bit_writer *writer,
  1162.                           int src_width, int src_height,
  1163.                           const uint8_t *src, int src_rowstride,
  1164.                           uint8_t endpoints[][4])
  1165. {
  1166.    int index;
  1167.    int y, x;
  1168.  
  1169.    /* If the endpoints have the same alpha then we'll just use index 0 for
  1170.     * all of the texels */
  1171.    if (endpoints[0][3] == endpoints[1][3]) {
  1172.       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
  1173.       return;
  1174.    }
  1175.  
  1176.    for (y = 0; y < src_height; y++) {
  1177.       for (x = 0; x < src_width; x++) {
  1178.          index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
  1179.                   ((int) endpoints[1][3] - endpoints[0][3]));
  1180.          if (index < 0)
  1181.             index = 0;
  1182.          else if (index > 7)
  1183.             index = 7;
  1184.  
  1185.          assert(x != 0 || y != 0 || index < 4);
  1186.  
  1187.          /* The first index has one less bit */
  1188.          write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
  1189.  
  1190.          src += 4;
  1191.       }
  1192.  
  1193.       /* Pad the indices out to the block size */
  1194.       if (src_width < BLOCK_SIZE)
  1195.          write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
  1196.  
  1197.       src += src_rowstride - src_width * 4;
  1198.    }
  1199.  
  1200.    /* Pad the indices out to the block size */
  1201.    if (src_height < BLOCK_SIZE)
  1202.       write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
  1203. }
  1204.  
  1205. static void
  1206. compress_rgba_unorm_block(int src_width, int src_height,
  1207.                           const uint8_t *src, int src_rowstride,
  1208.                           uint8_t *dst)
  1209. {
  1210.    int average_luminance, average_alpha;
  1211.    uint8_t endpoints[2][4];
  1212.    struct bit_writer writer;
  1213.    int component, endpoint;
  1214.  
  1215.    get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
  1216.                                      &average_luminance, &average_alpha);
  1217.    get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
  1218.                             average_luminance, average_alpha,
  1219.                             endpoints);
  1220.  
  1221.    writer.dst = dst;
  1222.    writer.pos = 0;
  1223.    writer.buf = 0;
  1224.  
  1225.    write_bits(&writer, 5, 0x10); /* mode 4 */
  1226.    write_bits(&writer, 2, 0); /* rotation 0 */
  1227.    write_bits(&writer, 1, 0); /* index selection bit */
  1228.  
  1229.    /* Write the color endpoints */
  1230.    for (component = 0; component < 3; component++)
  1231.       for (endpoint = 0; endpoint < 2; endpoint++)
  1232.          write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
  1233.  
  1234.    /* Write the alpha endpoints */
  1235.    for (endpoint = 0; endpoint < 2; endpoint++)
  1236.       write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
  1237.  
  1238.    write_rgb_indices_unorm(&writer,
  1239.                            src_width, src_height,
  1240.                            src, src_rowstride,
  1241.                            endpoints);
  1242.    write_alpha_indices_unorm(&writer,
  1243.                              src_width, src_height,
  1244.                              src, src_rowstride,
  1245.                              endpoints);
  1246. }
  1247.  
  1248. static void
  1249. compress_rgba_unorm(int width, int height,
  1250.                     const uint8_t *src, int src_rowstride,
  1251.                     uint8_t *dst, int dst_rowstride)
  1252. {
  1253.    int dst_row_diff;
  1254.    int y, x;
  1255.  
  1256.    if (dst_rowstride >= width * 4)
  1257.       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
  1258.    else
  1259.       dst_row_diff = 0;
  1260.  
  1261.    for (y = 0; y < height; y += BLOCK_SIZE) {
  1262.       for (x = 0; x < width; x += BLOCK_SIZE) {
  1263.          compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
  1264.                                    MIN2(height - y, BLOCK_SIZE),
  1265.                                    src + x * 4 + y * src_rowstride,
  1266.                                    src_rowstride,
  1267.                                    dst);
  1268.          dst += BLOCK_BYTES;
  1269.       }
  1270.       dst += dst_row_diff;
  1271.    }
  1272. }
  1273.  
  1274. GLboolean
  1275. _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
  1276. {
  1277.    const GLubyte *pixels;
  1278.    const GLubyte *tempImage = NULL;
  1279.    int rowstride;
  1280.  
  1281.    if (srcFormat != GL_RGBA ||
  1282.        srcType != GL_UNSIGNED_BYTE ||
  1283.        ctx->_ImageTransferState ||
  1284.        srcPacking->SwapBytes) {
  1285.       /* convert image to RGBA/ubyte */
  1286.       GLubyte *tempImageSlices[1];
  1287.       int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
  1288.       tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
  1289.       if (!tempImage)
  1290.          return GL_FALSE; /* out of memory */
  1291.       tempImageSlices[0] = (GLubyte *) tempImage;
  1292.       _mesa_texstore(ctx, dims,
  1293.                      baseInternalFormat,
  1294.                      MESA_FORMAT_R8G8B8A8_UNORM,
  1295.                      rgbaRowStride, tempImageSlices,
  1296.                      srcWidth, srcHeight, srcDepth,
  1297.                      srcFormat, srcType, srcAddr,
  1298.                      srcPacking);
  1299.  
  1300.       pixels = tempImage;
  1301.       rowstride = srcWidth * 4;
  1302.    } else {
  1303.       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
  1304.                                      srcFormat, srcType, 0, 0);
  1305.       rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
  1306.                                          srcFormat, srcType);
  1307.    }
  1308.  
  1309.    compress_rgba_unorm(srcWidth, srcHeight,
  1310.                        pixels, rowstride,
  1311.                        dstSlices[0], dstRowStride);
  1312.  
  1313.    free((void *) tempImage);
  1314.  
  1315.    return GL_TRUE;
  1316. }
  1317.  
  1318. static float
  1319. get_average_luminance_float(int width, int height,
  1320.                             const float *src, int src_rowstride)
  1321. {
  1322.    float luminance_sum = 0;
  1323.    int y, x;
  1324.  
  1325.    for (y = 0; y < height; y++) {
  1326.       for (x = 0; x < width; x++) {
  1327.          luminance_sum += src[0] + src[1] + src[2];
  1328.          src += 3;
  1329.       }
  1330.       src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
  1331.    }
  1332.  
  1333.    return luminance_sum / (width * height);
  1334. }
  1335.  
  1336. static float
  1337. clamp_value(float value, bool is_signed)
  1338. {
  1339.    if (value > 65504.0f)
  1340.       return 65504.0f;
  1341.  
  1342.    if (is_signed) {
  1343.       if (value < -65504.0f)
  1344.          return -65504.0f;
  1345.       else
  1346.          return value;
  1347.    }
  1348.  
  1349.    if (value < 0.0f)
  1350.       return 0.0f;
  1351.  
  1352.    return value;
  1353. }
  1354.  
  1355. static void
  1356. get_endpoints_float(int width, int height,
  1357.                     const float *src, int src_rowstride,
  1358.                     float average_luminance, float endpoints[][3],
  1359.                     bool is_signed)
  1360. {
  1361.    float endpoint_luminances[2];
  1362.    float midpoint;
  1363.    float sums[2][3];
  1364.    int endpoint, component;
  1365.    float luminance;
  1366.    float temp[3];
  1367.    const float *p = src;
  1368.    int left_endpoint_count = 0;
  1369.    int y, x, i;
  1370.  
  1371.    memset(sums, 0, sizeof sums);
  1372.  
  1373.    for (y = 0; y < height; y++) {
  1374.       for (x = 0; x < width; x++) {
  1375.          luminance = p[0] + p[1] + p[2];
  1376.          if (luminance < average_luminance) {
  1377.             endpoint = 0;
  1378.             left_endpoint_count++;
  1379.          } else {
  1380.             endpoint = 1;
  1381.          }
  1382.          for (i = 0; i < 3; i++)
  1383.             sums[endpoint][i] += p[i];
  1384.  
  1385.          p += 3;
  1386.       }
  1387.  
  1388.       p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
  1389.    }
  1390.  
  1391.    if (left_endpoint_count == 0 ||
  1392.        left_endpoint_count == width * height) {
  1393.       for (i = 0; i < 3; i++)
  1394.          endpoints[0][i] = endpoints[1][i] =
  1395.             (sums[0][i] + sums[1][i]) / (width * height);
  1396.    } else {
  1397.       for (i = 0; i < 3; i++) {
  1398.          endpoints[0][i] = sums[0][i] / left_endpoint_count;
  1399.          endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
  1400.       }
  1401.    }
  1402.  
  1403.    /* Clamp the endpoints to the range of a half float and strip out
  1404.     * infinities */
  1405.    for (endpoint = 0; endpoint < 2; endpoint++) {
  1406.       for (component = 0; component < 3; component++) {
  1407.          endpoints[endpoint][component] =
  1408.             clamp_value(endpoints[endpoint][component], is_signed);
  1409.       }
  1410.    }
  1411.  
  1412.    /* We may need to swap the endpoints to ensure the most-significant bit of
  1413.     * the first index is zero */
  1414.  
  1415.    for (endpoint = 0; endpoint < 2; endpoint++) {
  1416.       endpoint_luminances[endpoint] =
  1417.          endpoints[endpoint][0] +
  1418.          endpoints[endpoint][1] +
  1419.          endpoints[endpoint][2];
  1420.    }
  1421.    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
  1422.  
  1423.    if ((src[0] + src[1] + src[2] <= midpoint) !=
  1424.        (endpoint_luminances[0] <= midpoint)) {
  1425.       memcpy(temp, endpoints[0], sizeof temp);
  1426.       memcpy(endpoints[0], endpoints[1], sizeof temp);
  1427.       memcpy(endpoints[1], temp, sizeof temp);
  1428.    }
  1429. }
  1430.  
  1431. static void
  1432. write_rgb_indices_float(struct bit_writer *writer,
  1433.                         int src_width, int src_height,
  1434.                         const float *src, int src_rowstride,
  1435.                         float endpoints[][3])
  1436. {
  1437.    float luminance;
  1438.    float endpoint_luminances[2];
  1439.    int endpoint;
  1440.    int index;
  1441.    int y, x;
  1442.  
  1443.    for (endpoint = 0; endpoint < 2; endpoint++) {
  1444.       endpoint_luminances[endpoint] =
  1445.          endpoints[endpoint][0] +
  1446.          endpoints[endpoint][1] +
  1447.          endpoints[endpoint][2];
  1448.    }
  1449.  
  1450.    /* If the endpoints have the same luminance then we'll just use index 0 for
  1451.     * all of the texels */
  1452.    if (endpoint_luminances[0] == endpoint_luminances[1]) {
  1453.       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
  1454.       return;
  1455.    }
  1456.  
  1457.    for (y = 0; y < src_height; y++) {
  1458.       for (x = 0; x < src_width; x++) {
  1459.          luminance = src[0] + src[1] + src[2];
  1460.  
  1461.          index = ((luminance - endpoint_luminances[0]) * 15 /
  1462.                   (endpoint_luminances[1] - endpoint_luminances[0]));
  1463.          if (index < 0)
  1464.             index = 0;
  1465.          else if (index > 15)
  1466.             index = 15;
  1467.  
  1468.          assert(x != 0 || y != 0 || index < 8);
  1469.  
  1470.          write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
  1471.  
  1472.          src += 3;
  1473.       }
  1474.  
  1475.       /* Pad the indices out to the block size */
  1476.       if (src_width < BLOCK_SIZE)
  1477.          write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
  1478.  
  1479.       src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
  1480.    }
  1481.  
  1482.    /* Pad the indices out to the block size */
  1483.    if (src_height < BLOCK_SIZE)
  1484.       write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
  1485. }
  1486.  
  1487. static int
  1488. get_endpoint_value(float value, bool is_signed)
  1489. {
  1490.    bool sign = false;
  1491.    int half;
  1492.  
  1493.    if (is_signed) {
  1494.       half = _mesa_float_to_half(value);
  1495.  
  1496.       if (half & 0x8000) {
  1497.          half &= 0x7fff;
  1498.          sign = true;
  1499.       }
  1500.  
  1501.       half = (32 * half / 31) >> 6;
  1502.  
  1503.       if (sign)
  1504.          half = -half & ((1 << 10) - 1);
  1505.  
  1506.       return half;
  1507.    } else {
  1508.       if (value <= 0.0f)
  1509.          return 0;
  1510.  
  1511.       half = _mesa_float_to_half(value);
  1512.  
  1513.       return (64 * half / 31) >> 6;
  1514.    }
  1515. }
  1516.  
  1517. static void
  1518. compress_rgb_float_block(int src_width, int src_height,
  1519.                          const float *src, int src_rowstride,
  1520.                          uint8_t *dst,
  1521.                          bool is_signed)
  1522. {
  1523.    float average_luminance;
  1524.    float endpoints[2][3];
  1525.    struct bit_writer writer;
  1526.    int component, endpoint;
  1527.    int endpoint_value;
  1528.  
  1529.    average_luminance =
  1530.       get_average_luminance_float(src_width, src_height, src, src_rowstride);
  1531.    get_endpoints_float(src_width, src_height, src, src_rowstride,
  1532.                        average_luminance, endpoints, is_signed);
  1533.  
  1534.    writer.dst = dst;
  1535.    writer.pos = 0;
  1536.    writer.buf = 0;
  1537.  
  1538.    write_bits(&writer, 5, 3); /* mode 3 */
  1539.  
  1540.    /* Write the endpoints */
  1541.    for (endpoint = 0; endpoint < 2; endpoint++) {
  1542.       for (component = 0; component < 3; component++) {
  1543.          endpoint_value =
  1544.             get_endpoint_value(endpoints[endpoint][component], is_signed);
  1545.          write_bits(&writer, 10, endpoint_value);
  1546.       }
  1547.    }
  1548.  
  1549.    write_rgb_indices_float(&writer,
  1550.                            src_width, src_height,
  1551.                            src, src_rowstride,
  1552.                            endpoints);
  1553. }
  1554.  
  1555. static void
  1556. compress_rgb_float(int width, int height,
  1557.                    const float *src, int src_rowstride,
  1558.                    uint8_t *dst, int dst_rowstride,
  1559.                    bool is_signed)
  1560. {
  1561.    int dst_row_diff;
  1562.    int y, x;
  1563.  
  1564.    if (dst_rowstride >= width * 4)
  1565.       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
  1566.    else
  1567.       dst_row_diff = 0;
  1568.  
  1569.    for (y = 0; y < height; y += BLOCK_SIZE) {
  1570.       for (x = 0; x < width; x += BLOCK_SIZE) {
  1571.          compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
  1572.                                   MIN2(height - y, BLOCK_SIZE),
  1573.                                   src + x * 3 +
  1574.                                   y * src_rowstride / sizeof (float),
  1575.                                   src_rowstride,
  1576.                                   dst,
  1577.                                   is_signed);
  1578.          dst += BLOCK_BYTES;
  1579.       }
  1580.       dst += dst_row_diff;
  1581.    }
  1582. }
  1583.  
  1584. static GLboolean
  1585. texstore_bptc_rgb_float(TEXSTORE_PARAMS,
  1586.                         bool is_signed)
  1587. {
  1588.    const float *pixels;
  1589.    const float *tempImage = NULL;
  1590.    int rowstride;
  1591.  
  1592.    if (srcFormat != GL_RGB ||
  1593.        srcType != GL_FLOAT ||
  1594.        ctx->_ImageTransferState ||
  1595.        srcPacking->SwapBytes) {
  1596.       /* convert image to RGB/float */
  1597.       GLfloat *tempImageSlices[1];
  1598.       int rgbRowStride = 3 * srcWidth * sizeof(GLfloat);
  1599.       tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLfloat));
  1600.       if (!tempImage)
  1601.          return GL_FALSE; /* out of memory */
  1602.       tempImageSlices[0] = (GLfloat *) tempImage;
  1603.       _mesa_texstore(ctx, dims,
  1604.                      baseInternalFormat,
  1605.                      MESA_FORMAT_RGB_FLOAT32,
  1606.                      rgbRowStride, (GLubyte **)tempImageSlices,
  1607.                      srcWidth, srcHeight, srcDepth,
  1608.                      srcFormat, srcType, srcAddr,
  1609.                      srcPacking);
  1610.  
  1611.       pixels = tempImage;
  1612.       rowstride = srcWidth * sizeof(float) * 3;
  1613.    } else {
  1614.       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
  1615.                                      srcFormat, srcType, 0, 0);
  1616.       rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
  1617.                                          srcFormat, srcType);
  1618.    }
  1619.  
  1620.    compress_rgb_float(srcWidth, srcHeight,
  1621.                       pixels, rowstride,
  1622.                       dstSlices[0], dstRowStride,
  1623.                       is_signed);
  1624.  
  1625.    free((void *) tempImage);
  1626.  
  1627.    return GL_TRUE;
  1628. }
  1629.  
  1630. GLboolean
  1631. _mesa_texstore_bptc_rgb_signed_float(TEXSTORE_PARAMS)
  1632. {
  1633.    assert(dstFormat == MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT);
  1634.  
  1635.    return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
  1636.                                   dstFormat, dstRowStride, dstSlices,
  1637.                                   srcWidth, srcHeight, srcDepth,
  1638.                                   srcFormat, srcType,
  1639.                                   srcAddr, srcPacking,
  1640.                                   true /* signed */);
  1641. }
  1642.  
  1643. GLboolean
  1644. _mesa_texstore_bptc_rgb_unsigned_float(TEXSTORE_PARAMS)
  1645. {
  1646.    assert(dstFormat == MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT);
  1647.  
  1648.    return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
  1649.                                   dstFormat, dstRowStride, dstSlices,
  1650.                                   srcWidth, srcHeight, srcDepth,
  1651.                                   srcFormat, srcType,
  1652.                                   srcAddr, srcPacking,
  1653.                                   false /* unsigned */);
  1654. }
  1655.