Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 * Mesa 3-D graphics library
3
 *
4
 * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included
14
 * in all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
 * OTHER DEALINGS IN THE SOFTWARE.
23
 */
24
 
25
 
26
/**
27
 * \file texcompress_fxt1.c
28
 * GL_3DFX_texture_compression_FXT1 support.
29
 */
30
 
31
 
32
#include "glheader.h"
33
#include "imports.h"
34
#include "colormac.h"
35
#include "image.h"
36
#include "macros.h"
37
#include "mipmap.h"
38
#include "texcompress.h"
39
#include "texcompress_fxt1.h"
40
#include "texstore.h"
41
 
42
 
43
static void
44
fxt1_encode (GLuint width, GLuint height, GLint comps,
45
             const void *source, GLint srcRowStride,
46
             void *dest, GLint destRowStride);
47
 
48
static void
49
fxt1_decode_1 (const void *texture, GLint stride,
50
               GLint i, GLint j, GLubyte *rgba);
51
 
52
 
53
/**
54
 * Store user's image in rgb_fxt1 format.
55
 */
56
GLboolean
57
_mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
58
{
59
   const GLubyte *pixels;
60
   GLint srcRowStride;
61
   GLubyte *dst;
62
   const GLubyte *tempImage = NULL;
63
 
64
   ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
65
 
66
   if (srcFormat != GL_RGB ||
67
       srcType != GL_UNSIGNED_BYTE ||
68
       ctx->_ImageTransferState ||
69
       srcPacking->RowLength != srcWidth ||
70
       srcPacking->SwapBytes) {
71
      /* convert image to RGB/GLubyte */
72
      tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
73
                                             baseInternalFormat,
74
                                             _mesa_get_format_base_format(dstFormat),
75
                                             srcWidth, srcHeight, srcDepth,
76
                                             srcFormat, srcType, srcAddr,
77
                                             srcPacking);
78
      if (!tempImage)
79
         return GL_FALSE; /* out of memory */
80
      pixels = tempImage;
81
      srcRowStride = 3 * srcWidth;
82
      srcFormat = GL_RGB;
83
   }
84
   else {
85
      pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
86
                                     srcFormat, srcType, 0, 0);
87
 
88
      srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
89
                                            srcType) / sizeof(GLubyte);
90
   }
91
 
92
   dst = dstSlices[0];
93
 
94
   fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
95
               dst, dstRowStride);
96
 
97
   free((void*) tempImage);
98
 
99
   return GL_TRUE;
100
}
101
 
102
 
103
/**
104
 * Store user's image in rgba_fxt1 format.
105
 */
106
GLboolean
107
_mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
108
{
109
   const GLubyte *pixels;
110
   GLint srcRowStride;
111
   GLubyte *dst;
112
   const GLubyte *tempImage = NULL;
113
 
114
   ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
115
 
116
   if (srcFormat != GL_RGBA ||
117
       srcType != GL_UNSIGNED_BYTE ||
118
       ctx->_ImageTransferState ||
119
       srcPacking->SwapBytes) {
120
      /* convert image to RGBA/GLubyte */
121
      tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
122
                                             baseInternalFormat,
123
                                             _mesa_get_format_base_format(dstFormat),
124
                                             srcWidth, srcHeight, srcDepth,
125
                                             srcFormat, srcType, srcAddr,
126
                                             srcPacking);
127
      if (!tempImage)
128
         return GL_FALSE; /* out of memory */
129
      pixels = tempImage;
130
      srcRowStride = 4 * srcWidth;
131
      srcFormat = GL_RGBA;
132
   }
133
   else {
134
      pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
135
                                     srcFormat, srcType, 0, 0);
136
 
137
      srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
138
                                            srcType) / sizeof(GLubyte);
139
   }
140
 
141
   dst = dstSlices[0];
142
 
143
   fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
144
               dst, dstRowStride);
145
 
146
   free((void*) tempImage);
147
 
148
   return GL_TRUE;
149
}
150
 
151
 
152
/***************************************************************************\
153
 * FXT1 encoder
154
 *
155
 * The encoder was built by reversing the decoder,
156
 * and is vaguely based on Texus2 by 3dfx. Note that this code
157
 * is merely a proof of concept, since it is highly UNoptimized;
158
 * moreover, it is sub-optimal due to initial conditions passed
159
 * to Lloyd's algorithm (the interpolation modes are even worse).
160
\***************************************************************************/
161
 
162
 
163
#define MAX_COMP 4 /* ever needed maximum number of components in texel */
164
#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
165
#define N_TEXELS 32 /* number of texels in a block (always 32) */
166
#define LL_N_REP 50 /* number of iterations in lloyd's vq */
167
#define LL_RMS_D 10 /* fault tolerance (maximum delta) */
168
#define LL_RMS_E 255 /* fault tolerance (maximum error) */
169
#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
170
#define ISTBLACK(v) (*((GLuint *)(v)) == 0)
171
 
172
 
173
/*
174
 * Define a 64-bit unsigned integer type and macros
175
 */
176
#if 1
177
 
178
#define FX64_NATIVE 1
179
 
180
typedef uint64_t Fx64;
181
 
182
#define FX64_MOV32(a, b) a = b
183
#define FX64_OR32(a, b)  a |= b
184
#define FX64_SHL(a, c)   a <<= c
185
 
186
#else
187
 
188
#define FX64_NATIVE 0
189
 
190
typedef struct {
191
   GLuint lo, hi;
192
} Fx64;
193
 
194
#define FX64_MOV32(a, b) a.lo = b
195
#define FX64_OR32(a, b)  a.lo |= b
196
 
197
#define FX64_SHL(a, c)                                 \
198
   do {                                                \
199
       if ((c) >= 32) {                                \
200
          a.hi = a.lo << ((c) - 32);                   \
201
          a.lo = 0;                                    \
202
       } else {                                        \
203
          a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
204
          a.lo <<= (c);                                \
205
       }                                               \
206
   } while (0)
207
 
208
#endif
209
 
210
 
211
#define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
212
#define SAFECDOT 1 /* for paranoids */
213
 
214
#define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
215
   do {                                  \
216
      /* compute interpolation vector */ \
217
      GLfloat d2 = 0.0F;                 \
218
      GLfloat rd2;                       \
219
                                         \
220
      for (i = 0; i < NC; i++) {         \
221
         IV[i] = (V1[i] - V0[i]) * F(i); \
222
         d2 += IV[i] * IV[i];            \
223
      }                                  \
224
      rd2 = (GLfloat)NV / d2;            \
225
      B = 0;                             \
226
      for (i = 0; i < NC; i++) {         \
227
         IV[i] *= F(i);                  \
228
         B -= IV[i] * V0[i];             \
229
         IV[i] *= rd2;                   \
230
      }                                  \
231
      B = B * rd2 + 0.5f;                \
232
   } while (0)
233
 
234
#define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
235
   do {                                  \
236
      GLfloat dot = 0.0F;                \
237
      for (i = 0; i < NC; i++) {         \
238
         dot += V[i] * IV[i];            \
239
      }                                  \
240
      TEXEL = (GLint)(dot + B);          \
241
      if (SAFECDOT) {                    \
242
         if (TEXEL < 0) {                \
243
            TEXEL = 0;                   \
244
         } else if (TEXEL > NV) {        \
245
            TEXEL = NV;                  \
246
         }                               \
247
      }                                  \
248
   } while (0)
249
 
250
 
251
static GLint
252
fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
253
              GLubyte input[MAX_COMP], GLint nc)
254
{
255
   GLint i, j, best = -1;
256
   GLfloat err = 1e9; /* big enough */
257
 
258
   for (j = 0; j < nv; j++) {
259
      GLfloat e = 0.0F;
260
      for (i = 0; i < nc; i++) {
261
         e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
262
      }
263
      if (e < err) {
264
         err = e;
265
         best = j;
266
      }
267
   }
268
 
269
   return best;
270
}
271
 
272
 
273
static GLint
274
fxt1_worst (GLfloat vec[MAX_COMP],
275
            GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
276
{
277
   GLint i, k, worst = -1;
278
   GLfloat err = -1.0F; /* small enough */
279
 
280
   for (k = 0; k < n; k++) {
281
      GLfloat e = 0.0F;
282
      for (i = 0; i < nc; i++) {
283
         e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
284
      }
285
      if (e > err) {
286
         err = e;
287
         worst = k;
288
      }
289
   }
290
 
291
   return worst;
292
}
293
 
294
 
295
static GLint
296
fxt1_variance (GLdouble variance[MAX_COMP],
297
               GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
298
{
299
   GLint i, k, best = 0;
300
   GLint sx, sx2;
301
   GLdouble var, maxvar = -1; /* small enough */
302
   GLdouble teenth = 1.0 / n;
303
 
304
   for (i = 0; i < nc; i++) {
305
      sx = sx2 = 0;
306
      for (k = 0; k < n; k++) {
307
         GLint t = input[k][i];
308
         sx += t;
309
         sx2 += t * t;
310
      }
311
      var = sx2 * teenth - sx * sx * teenth * teenth;
312
      if (maxvar < var) {
313
         maxvar = var;
314
         best = i;
315
      }
316
      if (variance) {
317
         variance[i] = var;
318
      }
319
   }
320
 
321
   return best;
322
}
323
 
324
 
325
static GLint
326
fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
327
             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
328
{
329
#if 0
330
   /* Choose colors from a grid.
331
    */
332
   GLint i, j;
333
 
334
   for (j = 0; j < nv; j++) {
335
      GLint m = j * (n - 1) / (nv - 1);
336
      for (i = 0; i < nc; i++) {
337
         vec[j][i] = input[m][i];
338
      }
339
   }
340
#else
341
   /* Our solution here is to find the darkest and brightest colors in
342
    * the 8x4 tile and use those as the two representative colors.
343
    * There are probably better algorithms to use (histogram-based).
344
    */
345
   GLint i, j, k;
346
   GLint minSum = 2000; /* big enough */
347
   GLint maxSum = -1; /* small enough */
348
   GLint minCol = 0; /* phoudoin: silent compiler! */
349
   GLint maxCol = 0; /* phoudoin: silent compiler! */
350
 
351
   struct {
352
      GLint flag;
353
      GLint key;
354
      GLint freq;
355
      GLint idx;
356
   } hist[N_TEXELS];
357
   GLint lenh = 0;
358
 
359
   memset(hist, 0, sizeof(hist));
360
 
361
   for (k = 0; k < n; k++) {
362
      GLint l;
363
      GLint key = 0;
364
      GLint sum = 0;
365
      for (i = 0; i < nc; i++) {
366
         key <<= 8;
367
         key |= input[k][i];
368
         sum += input[k][i];
369
      }
370
      for (l = 0; l < n; l++) {
371
         if (!hist[l].flag) {
372
            /* alloc new slot */
373
            hist[l].flag = !0;
374
            hist[l].key = key;
375
            hist[l].freq = 1;
376
            hist[l].idx = k;
377
            lenh = l + 1;
378
            break;
379
         } else if (hist[l].key == key) {
380
            hist[l].freq++;
381
            break;
382
         }
383
      }
384
      if (minSum > sum) {
385
         minSum = sum;
386
         minCol = k;
387
      }
388
      if (maxSum < sum) {
389
         maxSum = sum;
390
         maxCol = k;
391
      }
392
   }
393
 
394
   if (lenh <= nv) {
395
      for (j = 0; j < lenh; j++) {
396
         for (i = 0; i < nc; i++) {
397
            vec[j][i] = (GLfloat)input[hist[j].idx][i];
398
         }
399
      }
400
      for (; j < nv; j++) {
401
         for (i = 0; i < nc; i++) {
402
            vec[j][i] = vec[0][i];
403
         }
404
      }
405
      return 0;
406
   }
407
 
408
   for (j = 0; j < nv; j++) {
409
      for (i = 0; i < nc; i++) {
410
         vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
411
      }
412
   }
413
#endif
414
 
415
   return !0;
416
}
417
 
418
 
419
static GLint
420
fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
421
            GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
422
{
423
   /* Use the generalized lloyd's algorithm for VQ:
424
    *     find 4 color vectors.
425
    *
426
    *     for each sample color
427
    *         sort to nearest vector.
428
    *
429
    *     replace each vector with the centroid of its matching colors.
430
    *
431
    *     repeat until RMS doesn't improve.
432
    *
433
    *     if a color vector has no samples, or becomes the same as another
434
    *     vector, replace it with the color which is farthest from a sample.
435
    *
436
    * vec[][MAX_COMP]           initial vectors and resulting colors
437
    * nv                        number of resulting colors required
438
    * input[N_TEXELS][MAX_COMP] input texels
439
    * nc                        number of components in input / vec
440
    * n                         number of input samples
441
    */
442
 
443
   GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
444
   GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
445
   GLfloat error, lasterror = 1e9;
446
 
447
   GLint i, j, k, rep;
448
 
449
   /* the quantizer */
450
   for (rep = 0; rep < LL_N_REP; rep++) {
451
      /* reset sums & counters */
452
      for (j = 0; j < nv; j++) {
453
         for (i = 0; i < nc; i++) {
454
            sum[j][i] = 0;
455
         }
456
         cnt[j] = 0;
457
      }
458
      error = 0;
459
 
460
      /* scan whole block */
461
      for (k = 0; k < n; k++) {
462
#if 1
463
         GLint best = -1;
464
         GLfloat err = 1e9; /* big enough */
465
         /* determine best vector */
466
         for (j = 0; j < nv; j++) {
467
            GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
468
                      (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
469
                      (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
470
            if (nc == 4) {
471
               e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
472
            }
473
            if (e < err) {
474
               err = e;
475
               best = j;
476
            }
477
         }
478
#else
479
         GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
480
#endif
481
         assert(best >= 0);
482
         /* add in closest color */
483
         for (i = 0; i < nc; i++) {
484
            sum[best][i] += input[k][i];
485
         }
486
         /* mark this vector as used */
487
         cnt[best]++;
488
         /* accumulate error */
489
         error += err;
490
      }
491
 
492
      /* check RMS */
493
      if ((error < LL_RMS_E) ||
494
          ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
495
         return !0; /* good match */
496
      }
497
      lasterror = error;
498
 
499
      /* move each vector to the barycenter of its closest colors */
500
      for (j = 0; j < nv; j++) {
501
         if (cnt[j]) {
502
            GLfloat div = 1.0F / cnt[j];
503
            for (i = 0; i < nc; i++) {
504
               vec[j][i] = div * sum[j][i];
505
            }
506
         } else {
507
            /* this vec has no samples or is identical with a previous vec */
508
            GLint worst = fxt1_worst(vec[j], input, nc, n);
509
            for (i = 0; i < nc; i++) {
510
               vec[j][i] = input[worst][i];
511
            }
512
         }
513
      }
514
   }
515
 
516
   return 0; /* could not converge fast enough */
517
}
518
 
519
 
520
static void
521
fxt1_quantize_CHROMA (GLuint *cc,
522
                      GLubyte input[N_TEXELS][MAX_COMP])
523
{
524
   const GLint n_vect = 4; /* 4 base vectors to find */
525
   const GLint n_comp = 3; /* 3 components: R, G, B */
526
   GLfloat vec[MAX_VECT][MAX_COMP];
527
   GLint i, j, k;
528
   Fx64 hi; /* high quadword */
529
   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
530
 
531
   if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
532
      fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
533
   }
534
 
535
   FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
536
   for (j = n_vect - 1; j >= 0; j--) {
537
      for (i = 0; i < n_comp; i++) {
538
         /* add in colors */
539
         FX64_SHL(hi, 5);
540
         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
541
      }
542
   }
543
   ((Fx64 *)cc)[1] = hi;
544
 
545
   lohi = lolo = 0;
546
   /* right microtile */
547
   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
548
      lohi <<= 2;
549
      lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
550
   }
551
   /* left microtile */
552
   for (; k >= 0; k--) {
553
      lolo <<= 2;
554
      lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
555
   }
556
   cc[1] = lohi;
557
   cc[0] = lolo;
558
}
559
 
560
 
561
static void
562
fxt1_quantize_ALPHA0 (GLuint *cc,
563
                      GLubyte input[N_TEXELS][MAX_COMP],
564
                      GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
565
{
566
   const GLint n_vect = 3; /* 3 base vectors to find */
567
   const GLint n_comp = 4; /* 4 components: R, G, B, A */
568
   GLfloat vec[MAX_VECT][MAX_COMP];
569
   GLint i, j, k;
570
   Fx64 hi; /* high quadword */
571
   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
572
 
573
   /* the last vector indicates zero */
574
   for (i = 0; i < n_comp; i++) {
575
      vec[n_vect][i] = 0;
576
   }
577
 
578
   /* the first n texels in reord are guaranteed to be non-zero */
579
   if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
580
      fxt1_lloyd(vec, n_vect, reord, n_comp, n);
581
   }
582
 
583
   FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
584
   for (j = n_vect - 1; j >= 0; j--) {
585
      /* add in alphas */
586
      FX64_SHL(hi, 5);
587
      FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
588
   }
589
   for (j = n_vect - 1; j >= 0; j--) {
590
      for (i = 0; i < n_comp - 1; i++) {
591
         /* add in colors */
592
         FX64_SHL(hi, 5);
593
         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
594
      }
595
   }
596
   ((Fx64 *)cc)[1] = hi;
597
 
598
   lohi = lolo = 0;
599
   /* right microtile */
600
   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
601
      lohi <<= 2;
602
      lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
603
   }
604
   /* left microtile */
605
   for (; k >= 0; k--) {
606
      lolo <<= 2;
607
      lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
608
   }
609
   cc[1] = lohi;
610
   cc[0] = lolo;
611
}
612
 
613
 
614
static void
615
fxt1_quantize_ALPHA1 (GLuint *cc,
616
                      GLubyte input[N_TEXELS][MAX_COMP])
617
{
618
   const GLint n_vect = 3; /* highest vector number in each microtile */
619
   const GLint n_comp = 4; /* 4 components: R, G, B, A */
620
   GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
621
   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
622
   GLint i, j, k;
623
   Fx64 hi; /* high quadword */
624
   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
625
 
626
   GLint minSum;
627
   GLint maxSum;
628
   GLint minColL = 0, maxColL = 0;
629
   GLint minColR = 0, maxColR = 0;
630
   GLint sumL = 0, sumR = 0;
631
   GLint nn_comp;
632
   /* Our solution here is to find the darkest and brightest colors in
633
    * the 4x4 tile and use those as the two representative colors.
634
    * There are probably better algorithms to use (histogram-based).
635
    */
636
   nn_comp = n_comp;
637
   while ((minColL == maxColL) && nn_comp) {
638
       minSum = 2000; /* big enough */
639
       maxSum = -1; /* small enough */
640
       for (k = 0; k < N_TEXELS / 2; k++) {
641
           GLint sum = 0;
642
           for (i = 0; i < nn_comp; i++) {
643
               sum += input[k][i];
644
           }
645
           if (minSum > sum) {
646
               minSum = sum;
647
               minColL = k;
648
           }
649
           if (maxSum < sum) {
650
               maxSum = sum;
651
               maxColL = k;
652
           }
653
           sumL += sum;
654
       }
655
 
656
       nn_comp--;
657
   }
658
 
659
   nn_comp = n_comp;
660
   while ((minColR == maxColR) && nn_comp) {
661
       minSum = 2000; /* big enough */
662
       maxSum = -1; /* small enough */
663
       for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
664
           GLint sum = 0;
665
           for (i = 0; i < nn_comp; i++) {
666
               sum += input[k][i];
667
           }
668
           if (minSum > sum) {
669
               minSum = sum;
670
               minColR = k;
671
           }
672
           if (maxSum < sum) {
673
               maxSum = sum;
674
               maxColR = k;
675
           }
676
           sumR += sum;
677
       }
678
 
679
       nn_comp--;
680
   }
681
 
682
   /* choose the common vector (yuck!) */
683
   {
684
      GLint j1, j2;
685
      GLint v1 = 0, v2 = 0;
686
      GLfloat err = 1e9; /* big enough */
687
      GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
688
      for (i = 0; i < n_comp; i++) {
689
         tv[0][i] = input[minColL][i];
690
         tv[1][i] = input[maxColL][i];
691
         tv[2][i] = input[minColR][i];
692
         tv[3][i] = input[maxColR][i];
693
      }
694
      for (j1 = 0; j1 < 2; j1++) {
695
         for (j2 = 2; j2 < 4; j2++) {
696
            GLfloat e = 0.0F;
697
            for (i = 0; i < n_comp; i++) {
698
               e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
699
            }
700
            if (e < err) {
701
               err = e;
702
               v1 = j1;
703
               v2 = j2;
704
            }
705
         }
706
      }
707
      for (i = 0; i < n_comp; i++) {
708
         vec[0][i] = tv[1 - v1][i];
709
         vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
710
         vec[2][i] = tv[5 - v2][i];
711
      }
712
   }
713
 
714
   /* left microtile */
715
   cc[0] = 0;
716
   if (minColL != maxColL) {
717
      /* compute interpolation vector */
718
      MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
719
 
720
      /* add in texels */
721
      lolo = 0;
722
      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
723
         GLint texel;
724
         /* interpolate color */
725
         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
726
         /* add in texel */
727
         lolo <<= 2;
728
         lolo |= texel;
729
      }
730
 
731
      cc[0] = lolo;
732
   }
733
 
734
   /* right microtile */
735
   cc[1] = 0;
736
   if (minColR != maxColR) {
737
      /* compute interpolation vector */
738
      MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
739
 
740
      /* add in texels */
741
      lohi = 0;
742
      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
743
         GLint texel;
744
         /* interpolate color */
745
         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
746
         /* add in texel */
747
         lohi <<= 2;
748
         lohi |= texel;
749
      }
750
 
751
      cc[1] = lohi;
752
   }
753
 
754
   FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
755
   for (j = n_vect - 1; j >= 0; j--) {
756
      /* add in alphas */
757
      FX64_SHL(hi, 5);
758
      FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
759
   }
760
   for (j = n_vect - 1; j >= 0; j--) {
761
      for (i = 0; i < n_comp - 1; i++) {
762
         /* add in colors */
763
         FX64_SHL(hi, 5);
764
         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
765
      }
766
   }
767
   ((Fx64 *)cc)[1] = hi;
768
}
769
 
770
 
771
static void
772
fxt1_quantize_HI (GLuint *cc,
773
                  GLubyte input[N_TEXELS][MAX_COMP],
774
                  GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
775
{
776
   const GLint n_vect = 6; /* highest vector number */
777
   const GLint n_comp = 3; /* 3 components: R, G, B */
778
   GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
779
   GLfloat iv[MAX_COMP];   /* interpolation vector */
780
   GLint i, k;
781
   GLuint hihi; /* high quadword: hi dword */
782
 
783
   GLint minSum = 2000; /* big enough */
784
   GLint maxSum = -1; /* small enough */
785
   GLint minCol = 0; /* phoudoin: silent compiler! */
786
   GLint maxCol = 0; /* phoudoin: silent compiler! */
787
 
788
   /* Our solution here is to find the darkest and brightest colors in
789
    * the 8x4 tile and use those as the two representative colors.
790
    * There are probably better algorithms to use (histogram-based).
791
    */
792
   for (k = 0; k < n; k++) {
793
      GLint sum = 0;
794
      for (i = 0; i < n_comp; i++) {
795
         sum += reord[k][i];
796
      }
797
      if (minSum > sum) {
798
         minSum = sum;
799
         minCol = k;
800
      }
801
      if (maxSum < sum) {
802
         maxSum = sum;
803
         maxCol = k;
804
      }
805
   }
806
 
807
   hihi = 0; /* cc-hi = "00" */
808
   for (i = 0; i < n_comp; i++) {
809
      /* add in colors */
810
      hihi <<= 5;
811
      hihi |= reord[maxCol][i] >> 3;
812
   }
813
   for (i = 0; i < n_comp; i++) {
814
      /* add in colors */
815
      hihi <<= 5;
816
      hihi |= reord[minCol][i] >> 3;
817
   }
818
   cc[3] = hihi;
819
   cc[0] = cc[1] = cc[2] = 0;
820
 
821
   /* compute interpolation vector */
822
   if (minCol != maxCol) {
823
      MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
824
   }
825
 
826
   /* add in texels */
827
   for (k = N_TEXELS - 1; k >= 0; k--) {
828
      GLint t = k * 3;
829
      GLuint *kk = (GLuint *)((char *)cc + t / 8);
830
      GLint texel = n_vect + 1; /* transparent black */
831
 
832
      if (!ISTBLACK(input[k])) {
833
         if (minCol != maxCol) {
834
            /* interpolate color */
835
            CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
836
            /* add in texel */
837
            kk[0] |= texel << (t & 7);
838
         }
839
      } else {
840
         /* add in texel */
841
         kk[0] |= texel << (t & 7);
842
      }
843
   }
844
}
845
 
846
 
847
static void
848
fxt1_quantize_MIXED1 (GLuint *cc,
849
                      GLubyte input[N_TEXELS][MAX_COMP])
850
{
851
   const GLint n_vect = 2; /* highest vector number in each microtile */
852
   const GLint n_comp = 3; /* 3 components: R, G, B */
853
   GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
854
   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
855
   GLint i, j, k;
856
   Fx64 hi; /* high quadword */
857
   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
858
 
859
   GLint minSum;
860
   GLint maxSum;
861
   GLint minColL = 0, maxColL = -1;
862
   GLint minColR = 0, maxColR = -1;
863
 
864
   /* Our solution here is to find the darkest and brightest colors in
865
    * the 4x4 tile and use those as the two representative colors.
866
    * There are probably better algorithms to use (histogram-based).
867
    */
868
   minSum = 2000; /* big enough */
869
   maxSum = -1; /* small enough */
870
   for (k = 0; k < N_TEXELS / 2; k++) {
871
      if (!ISTBLACK(input[k])) {
872
         GLint sum = 0;
873
         for (i = 0; i < n_comp; i++) {
874
            sum += input[k][i];
875
         }
876
         if (minSum > sum) {
877
            minSum = sum;
878
            minColL = k;
879
         }
880
         if (maxSum < sum) {
881
            maxSum = sum;
882
            maxColL = k;
883
         }
884
      }
885
   }
886
   minSum = 2000; /* big enough */
887
   maxSum = -1; /* small enough */
888
   for (; k < N_TEXELS; k++) {
889
      if (!ISTBLACK(input[k])) {
890
         GLint sum = 0;
891
         for (i = 0; i < n_comp; i++) {
892
            sum += input[k][i];
893
         }
894
         if (minSum > sum) {
895
            minSum = sum;
896
            minColR = k;
897
         }
898
         if (maxSum < sum) {
899
            maxSum = sum;
900
            maxColR = k;
901
         }
902
      }
903
   }
904
 
905
   /* left microtile */
906
   if (maxColL == -1) {
907
      /* all transparent black */
908
      cc[0] = ~0u;
909
      for (i = 0; i < n_comp; i++) {
910
         vec[0][i] = 0;
911
         vec[1][i] = 0;
912
      }
913
   } else {
914
      cc[0] = 0;
915
      for (i = 0; i < n_comp; i++) {
916
         vec[0][i] = input[minColL][i];
917
         vec[1][i] = input[maxColL][i];
918
      }
919
      if (minColL != maxColL) {
920
         /* compute interpolation vector */
921
         MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
922
 
923
         /* add in texels */
924
         lolo = 0;
925
         for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
926
            GLint texel = n_vect + 1; /* transparent black */
927
            if (!ISTBLACK(input[k])) {
928
               /* interpolate color */
929
               CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
930
            }
931
            /* add in texel */
932
            lolo <<= 2;
933
            lolo |= texel;
934
         }
935
         cc[0] = lolo;
936
      }
937
   }
938
 
939
   /* right microtile */
940
   if (maxColR == -1) {
941
      /* all transparent black */
942
      cc[1] = ~0u;
943
      for (i = 0; i < n_comp; i++) {
944
         vec[2][i] = 0;
945
         vec[3][i] = 0;
946
      }
947
   } else {
948
      cc[1] = 0;
949
      for (i = 0; i < n_comp; i++) {
950
         vec[2][i] = input[minColR][i];
951
         vec[3][i] = input[maxColR][i];
952
      }
953
      if (minColR != maxColR) {
954
         /* compute interpolation vector */
955
         MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
956
 
957
         /* add in texels */
958
         lohi = 0;
959
         for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
960
            GLint texel = n_vect + 1; /* transparent black */
961
            if (!ISTBLACK(input[k])) {
962
               /* interpolate color */
963
               CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
964
            }
965
            /* add in texel */
966
            lohi <<= 2;
967
            lohi |= texel;
968
         }
969
         cc[1] = lohi;
970
      }
971
   }
972
 
973
   FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
974
   for (j = 2 * 2 - 1; j >= 0; j--) {
975
      for (i = 0; i < n_comp; i++) {
976
         /* add in colors */
977
         FX64_SHL(hi, 5);
978
         FX64_OR32(hi, vec[j][i] >> 3);
979
      }
980
   }
981
   ((Fx64 *)cc)[1] = hi;
982
}
983
 
984
 
985
static void
986
fxt1_quantize_MIXED0 (GLuint *cc,
987
                      GLubyte input[N_TEXELS][MAX_COMP])
988
{
989
   const GLint n_vect = 3; /* highest vector number in each microtile */
990
   const GLint n_comp = 3; /* 3 components: R, G, B */
991
   GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
992
   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
993
   GLint i, j, k;
994
   Fx64 hi; /* high quadword */
995
   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
996
 
997
   GLint minColL = 0, maxColL = 0;
998
   GLint minColR = 0, maxColR = 0;
999
#if 0
1000
   GLint minSum;
1001
   GLint maxSum;
1002
 
1003
   /* Our solution here is to find the darkest and brightest colors in
1004
    * the 4x4 tile and use those as the two representative colors.
1005
    * There are probably better algorithms to use (histogram-based).
1006
    */
1007
   minSum = 2000; /* big enough */
1008
   maxSum = -1; /* small enough */
1009
   for (k = 0; k < N_TEXELS / 2; k++) {
1010
      GLint sum = 0;
1011
      for (i = 0; i < n_comp; i++) {
1012
         sum += input[k][i];
1013
      }
1014
      if (minSum > sum) {
1015
         minSum = sum;
1016
         minColL = k;
1017
      }
1018
      if (maxSum < sum) {
1019
         maxSum = sum;
1020
         maxColL = k;
1021
      }
1022
   }
1023
   minSum = 2000; /* big enough */
1024
   maxSum = -1; /* small enough */
1025
   for (; k < N_TEXELS; k++) {
1026
      GLint sum = 0;
1027
      for (i = 0; i < n_comp; i++) {
1028
         sum += input[k][i];
1029
      }
1030
      if (minSum > sum) {
1031
         minSum = sum;
1032
         minColR = k;
1033
      }
1034
      if (maxSum < sum) {
1035
         maxSum = sum;
1036
         maxColR = k;
1037
      }
1038
   }
1039
#else
1040
   GLint minVal;
1041
   GLint maxVal;
1042
   GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1043
   GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1044
 
1045
   /* Scan the channel with max variance for lo & hi
1046
    * and use those as the two representative colors.
1047
    */
1048
   minVal = 2000; /* big enough */
1049
   maxVal = -1; /* small enough */
1050
   for (k = 0; k < N_TEXELS / 2; k++) {
1051
      GLint t = input[k][maxVarL];
1052
      if (minVal > t) {
1053
         minVal = t;
1054
         minColL = k;
1055
      }
1056
      if (maxVal < t) {
1057
         maxVal = t;
1058
         maxColL = k;
1059
      }
1060
   }
1061
   minVal = 2000; /* big enough */
1062
   maxVal = -1; /* small enough */
1063
   for (; k < N_TEXELS; k++) {
1064
      GLint t = input[k][maxVarR];
1065
      if (minVal > t) {
1066
         minVal = t;
1067
         minColR = k;
1068
      }
1069
      if (maxVal < t) {
1070
         maxVal = t;
1071
         maxColR = k;
1072
      }
1073
   }
1074
#endif
1075
 
1076
   /* left microtile */
1077
   cc[0] = 0;
1078
   for (i = 0; i < n_comp; i++) {
1079
      vec[0][i] = input[minColL][i];
1080
      vec[1][i] = input[maxColL][i];
1081
   }
1082
   if (minColL != maxColL) {
1083
      /* compute interpolation vector */
1084
      MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1085
 
1086
      /* add in texels */
1087
      lolo = 0;
1088
      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1089
         GLint texel;
1090
         /* interpolate color */
1091
         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1092
         /* add in texel */
1093
         lolo <<= 2;
1094
         lolo |= texel;
1095
      }
1096
 
1097
      /* funky encoding for LSB of green */
1098
      if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1099
         for (i = 0; i < n_comp; i++) {
1100
            vec[1][i] = input[minColL][i];
1101
            vec[0][i] = input[maxColL][i];
1102
         }
1103
         lolo = ~lolo;
1104
      }
1105
 
1106
      cc[0] = lolo;
1107
   }
1108
 
1109
   /* right microtile */
1110
   cc[1] = 0;
1111
   for (i = 0; i < n_comp; i++) {
1112
      vec[2][i] = input[minColR][i];
1113
      vec[3][i] = input[maxColR][i];
1114
   }
1115
   if (minColR != maxColR) {
1116
      /* compute interpolation vector */
1117
      MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1118
 
1119
      /* add in texels */
1120
      lohi = 0;
1121
      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1122
         GLint texel;
1123
         /* interpolate color */
1124
         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1125
         /* add in texel */
1126
         lohi <<= 2;
1127
         lohi |= texel;
1128
      }
1129
 
1130
      /* funky encoding for LSB of green */
1131
      if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1132
         for (i = 0; i < n_comp; i++) {
1133
            vec[3][i] = input[minColR][i];
1134
            vec[2][i] = input[maxColR][i];
1135
         }
1136
         lohi = ~lohi;
1137
      }
1138
 
1139
      cc[1] = lohi;
1140
   }
1141
 
1142
   FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1143
   for (j = 2 * 2 - 1; j >= 0; j--) {
1144
      for (i = 0; i < n_comp; i++) {
1145
         /* add in colors */
1146
         FX64_SHL(hi, 5);
1147
         FX64_OR32(hi, vec[j][i] >> 3);
1148
      }
1149
   }
1150
   ((Fx64 *)cc)[1] = hi;
1151
}
1152
 
1153
 
1154
static void
1155
fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1156
{
1157
   GLint trualpha;
1158
   GLubyte reord[N_TEXELS][MAX_COMP];
1159
 
1160
   GLubyte input[N_TEXELS][MAX_COMP];
1161
   GLint i, k, l;
1162
 
1163
   if (comps == 3) {
1164
      /* make the whole block opaque */
1165
      memset(input, -1, sizeof(input));
1166
   }
1167
 
1168
   /* 8 texels each line */
1169
   for (l = 0; l < 4; l++) {
1170
      for (k = 0; k < 4; k++) {
1171
         for (i = 0; i < comps; i++) {
1172
            input[k + l * 4][i] = *lines[l]++;
1173
         }
1174
      }
1175
      for (; k < 8; k++) {
1176
         for (i = 0; i < comps; i++) {
1177
            input[k + l * 4 + 12][i] = *lines[l]++;
1178
         }
1179
      }
1180
   }
1181
 
1182
   /* block layout:
1183
    * 00, 01, 02, 03, 08, 09, 0a, 0b
1184
    * 10, 11, 12, 13, 18, 19, 1a, 1b
1185
    * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1186
    * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1187
    */
1188
 
1189
   /* [dBorca]
1190
    * stupidity flows forth from this
1191
    */
1192
   l = N_TEXELS;
1193
   trualpha = 0;
1194
   if (comps == 4) {
1195
      /* skip all transparent black texels */
1196
      l = 0;
1197
      for (k = 0; k < N_TEXELS; k++) {
1198
         /* test all components against 0 */
1199
         if (!ISTBLACK(input[k])) {
1200
            /* texel is not transparent black */
1201
            COPY_4UBV(reord[l], input[k]);
1202
            if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1203
               /* non-opaque texel */
1204
               trualpha = !0;
1205
            }
1206
            l++;
1207
         }
1208
      }
1209
   }
1210
 
1211
#if 0
1212
   if (trualpha) {
1213
      fxt1_quantize_ALPHA0(cc, input, reord, l);
1214
   } else if (l == 0) {
1215
      cc[0] = cc[1] = cc[2] = -1;
1216
      cc[3] = 0;
1217
   } else if (l < N_TEXELS) {
1218
      fxt1_quantize_HI(cc, input, reord, l);
1219
   } else {
1220
      fxt1_quantize_CHROMA(cc, input);
1221
   }
1222
   (void)fxt1_quantize_ALPHA1;
1223
   (void)fxt1_quantize_MIXED1;
1224
   (void)fxt1_quantize_MIXED0;
1225
#else
1226
   if (trualpha) {
1227
      fxt1_quantize_ALPHA1(cc, input);
1228
   } else if (l == 0) {
1229
      cc[0] = cc[1] = cc[2] = ~0u;
1230
      cc[3] = 0;
1231
   } else if (l < N_TEXELS) {
1232
      fxt1_quantize_MIXED1(cc, input);
1233
   } else {
1234
      fxt1_quantize_MIXED0(cc, input);
1235
   }
1236
   (void)fxt1_quantize_ALPHA0;
1237
   (void)fxt1_quantize_HI;
1238
   (void)fxt1_quantize_CHROMA;
1239
#endif
1240
}
1241
 
1242
 
1243
 
1244
/**
1245
 * Upscale an image by replication, not (typical) stretching.
1246
 * We use this when the image width or height is less than a
1247
 * certain size (4, 8) and we need to upscale an image.
1248
 */
1249
static void
1250
upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1251
                   GLsizei outWidth, GLsizei outHeight,
1252
                   GLint comps, const GLubyte *src, GLint srcRowStride,
1253
                   GLubyte *dest )
1254
{
1255
   GLint i, j, k;
1256
 
1257
   ASSERT(outWidth >= inWidth);
1258
   ASSERT(outHeight >= inHeight);
1259
#if 0
1260
   ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1261
   ASSERT((outWidth & 3) == 0);
1262
   ASSERT((outHeight & 3) == 0);
1263
#endif
1264
 
1265
   for (i = 0; i < outHeight; i++) {
1266
      const GLint ii = i % inHeight;
1267
      for (j = 0; j < outWidth; j++) {
1268
         const GLint jj = j % inWidth;
1269
         for (k = 0; k < comps; k++) {
1270
            dest[(i * outWidth + j) * comps + k]
1271
               = src[ii * srcRowStride + jj * comps + k];
1272
         }
1273
      }
1274
   }
1275
}
1276
 
1277
 
1278
static void
1279
fxt1_encode (GLuint width, GLuint height, GLint comps,
1280
             const void *source, GLint srcRowStride,
1281
             void *dest, GLint destRowStride)
1282
{
1283
   GLuint x, y;
1284
   const GLubyte *data;
1285
   GLuint *encoded = (GLuint *)dest;
1286
   void *newSource = NULL;
1287
 
1288
   assert(comps == 3 || comps == 4);
1289
 
1290
   /* Replicate image if width is not M8 or height is not M4 */
1291
   if ((width & 7) | (height & 3)) {
1292
      GLint newWidth = (width + 7) & ~7;
1293
      GLint newHeight = (height + 3) & ~3;
1294
      newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1295
      if (!newSource) {
1296
         GET_CURRENT_CONTEXT(ctx);
1297
         _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1298
         goto cleanUp;
1299
      }
1300
      upscale_teximage2d(width, height, newWidth, newHeight,
1301
                         comps, (const GLubyte *) source,
1302
                         srcRowStride, (GLubyte *) newSource);
1303
      source = newSource;
1304
      width = newWidth;
1305
      height = newHeight;
1306
      srcRowStride = comps * newWidth;
1307
   }
1308
 
1309
   data = (const GLubyte *) source;
1310
   destRowStride = (destRowStride - width * 2) / 4;
1311
   for (y = 0; y < height; y += 4) {
1312
      GLuint offs = 0 + (y + 0) * srcRowStride;
1313
      for (x = 0; x < width; x += 8) {
1314
         const GLubyte *lines[4];
1315
         lines[0] = &data[offs];
1316
         lines[1] = lines[0] + srcRowStride;
1317
         lines[2] = lines[1] + srcRowStride;
1318
         lines[3] = lines[2] + srcRowStride;
1319
         offs += 8 * comps;
1320
         fxt1_quantize(encoded, lines, comps);
1321
         /* 128 bits per 8x4 block */
1322
         encoded += 4;
1323
      }
1324
      encoded += destRowStride;
1325
   }
1326
 
1327
 cleanUp:
1328
   free(newSource);
1329
}
1330
 
1331
 
1332
/***************************************************************************\
1333
 * FXT1 decoder
1334
 *
1335
 * The decoder is based on GL_3DFX_texture_compression_FXT1
1336
 * specification and serves as a concept for the encoder.
1337
\***************************************************************************/
1338
 
1339
 
1340
/* lookup table for scaling 5 bit colors up to 8 bits */
1341
static const GLubyte _rgb_scale_5[] = {
1342
   0,   8,   16,  25,  33,  41,  49,  58,
1343
   66,  74,  82,  90,  99,  107, 115, 123,
1344
   132, 140, 148, 156, 165, 173, 181, 189,
1345
   197, 206, 214, 222, 230, 239, 247, 255
1346
};
1347
 
1348
/* lookup table for scaling 6 bit colors up to 8 bits */
1349
static const GLubyte _rgb_scale_6[] = {
1350
   0,   4,   8,   12,  16,  20,  24,  28,
1351
   32,  36,  40,  45,  49,  53,  57,  61,
1352
   65,  69,  73,  77,  81,  85,  89,  93,
1353
   97,  101, 105, 109, 113, 117, 121, 125,
1354
   130, 134, 138, 142, 146, 150, 154, 158,
1355
   162, 166, 170, 174, 178, 182, 186, 190,
1356
   194, 198, 202, 206, 210, 215, 219, 223,
1357
   227, 231, 235, 239, 243, 247, 251, 255
1358
};
1359
 
1360
 
1361
#define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1362
#define UP5(c) _rgb_scale_5[(c) & 31]
1363
#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1364
#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1365
 
1366
 
1367
static void
1368
fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1369
{
1370
   const GLuint *cc;
1371
 
1372
   t *= 3;
1373
   cc = (const GLuint *)(code + t / 8);
1374
   t = (cc[0] >> (t & 7)) & 7;
1375
 
1376
   if (t == 7) {
1377
      rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1378
   } else {
1379
      GLubyte r, g, b;
1380
      cc = (const GLuint *)(code + 12);
1381
      if (t == 0) {
1382
         b = UP5(CC_SEL(cc, 0));
1383
         g = UP5(CC_SEL(cc, 5));
1384
         r = UP5(CC_SEL(cc, 10));
1385
      } else if (t == 6) {
1386
         b = UP5(CC_SEL(cc, 15));
1387
         g = UP5(CC_SEL(cc, 20));
1388
         r = UP5(CC_SEL(cc, 25));
1389
      } else {
1390
         b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1391
         g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1392
         r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1393
      }
1394
      rgba[RCOMP] = r;
1395
      rgba[GCOMP] = g;
1396
      rgba[BCOMP] = b;
1397
      rgba[ACOMP] = 255;
1398
   }
1399
}
1400
 
1401
 
1402
static void
1403
fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1404
{
1405
   const GLuint *cc;
1406
   GLuint kk;
1407
 
1408
   cc = (const GLuint *)code;
1409
   if (t & 16) {
1410
      cc++;
1411
      t &= 15;
1412
   }
1413
   t = (cc[0] >> (t * 2)) & 3;
1414
 
1415
   t *= 15;
1416
   cc = (const GLuint *)(code + 8 + t / 8);
1417
   kk = cc[0] >> (t & 7);
1418
   rgba[BCOMP] = UP5(kk);
1419
   rgba[GCOMP] = UP5(kk >> 5);
1420
   rgba[RCOMP] = UP5(kk >> 10);
1421
   rgba[ACOMP] = 255;
1422
}
1423
 
1424
 
1425
static void
1426
fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1427
{
1428
   const GLuint *cc;
1429
   GLuint col[2][3];
1430
   GLint glsb, selb;
1431
 
1432
   cc = (const GLuint *)code;
1433
   if (t & 16) {
1434
      t &= 15;
1435
      t = (cc[1] >> (t * 2)) & 3;
1436
      /* col 2 */
1437
      col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1438
      col[0][GCOMP] = CC_SEL(cc, 99);
1439
      col[0][RCOMP] = CC_SEL(cc, 104);
1440
      /* col 3 */
1441
      col[1][BCOMP] = CC_SEL(cc, 109);
1442
      col[1][GCOMP] = CC_SEL(cc, 114);
1443
      col[1][RCOMP] = CC_SEL(cc, 119);
1444
      glsb = CC_SEL(cc, 126);
1445
      selb = CC_SEL(cc, 33);
1446
   } else {
1447
      t = (cc[0] >> (t * 2)) & 3;
1448
      /* col 0 */
1449
      col[0][BCOMP] = CC_SEL(cc, 64);
1450
      col[0][GCOMP] = CC_SEL(cc, 69);
1451
      col[0][RCOMP] = CC_SEL(cc, 74);
1452
      /* col 1 */
1453
      col[1][BCOMP] = CC_SEL(cc, 79);
1454
      col[1][GCOMP] = CC_SEL(cc, 84);
1455
      col[1][RCOMP] = CC_SEL(cc, 89);
1456
      glsb = CC_SEL(cc, 125);
1457
      selb = CC_SEL(cc, 1);
1458
   }
1459
 
1460
   if (CC_SEL(cc, 124) & 1) {
1461
      /* alpha[0] == 1 */
1462
 
1463
      if (t == 3) {
1464
         /* zero */
1465
         rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1466
      } else {
1467
         GLubyte r, g, b;
1468
         if (t == 0) {
1469
            b = UP5(col[0][BCOMP]);
1470
            g = UP5(col[0][GCOMP]);
1471
            r = UP5(col[0][RCOMP]);
1472
         } else if (t == 2) {
1473
            b = UP5(col[1][BCOMP]);
1474
            g = UP6(col[1][GCOMP], glsb);
1475
            r = UP5(col[1][RCOMP]);
1476
         } else {
1477
            b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1478
            g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1479
            r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1480
         }
1481
         rgba[RCOMP] = r;
1482
         rgba[GCOMP] = g;
1483
         rgba[BCOMP] = b;
1484
         rgba[ACOMP] = 255;
1485
      }
1486
   } else {
1487
      /* alpha[0] == 0 */
1488
      GLubyte r, g, b;
1489
      if (t == 0) {
1490
         b = UP5(col[0][BCOMP]);
1491
         g = UP6(col[0][GCOMP], glsb ^ selb);
1492
         r = UP5(col[0][RCOMP]);
1493
      } else if (t == 3) {
1494
         b = UP5(col[1][BCOMP]);
1495
         g = UP6(col[1][GCOMP], glsb);
1496
         r = UP5(col[1][RCOMP]);
1497
      } else {
1498
         b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1499
         g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1500
                        UP6(col[1][GCOMP], glsb));
1501
         r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1502
      }
1503
      rgba[RCOMP] = r;
1504
      rgba[GCOMP] = g;
1505
      rgba[BCOMP] = b;
1506
      rgba[ACOMP] = 255;
1507
   }
1508
}
1509
 
1510
 
1511
static void
1512
fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1513
{
1514
   const GLuint *cc;
1515
   GLubyte r, g, b, a;
1516
 
1517
   cc = (const GLuint *)code;
1518
   if (CC_SEL(cc, 124) & 1) {
1519
      /* lerp == 1 */
1520
      GLuint col0[4];
1521
 
1522
      if (t & 16) {
1523
         t &= 15;
1524
         t = (cc[1] >> (t * 2)) & 3;
1525
         /* col 2 */
1526
         col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1527
         col0[GCOMP] = CC_SEL(cc, 99);
1528
         col0[RCOMP] = CC_SEL(cc, 104);
1529
         col0[ACOMP] = CC_SEL(cc, 119);
1530
      } else {
1531
         t = (cc[0] >> (t * 2)) & 3;
1532
         /* col 0 */
1533
         col0[BCOMP] = CC_SEL(cc, 64);
1534
         col0[GCOMP] = CC_SEL(cc, 69);
1535
         col0[RCOMP] = CC_SEL(cc, 74);
1536
         col0[ACOMP] = CC_SEL(cc, 109);
1537
      }
1538
 
1539
      if (t == 0) {
1540
         b = UP5(col0[BCOMP]);
1541
         g = UP5(col0[GCOMP]);
1542
         r = UP5(col0[RCOMP]);
1543
         a = UP5(col0[ACOMP]);
1544
      } else if (t == 3) {
1545
         b = UP5(CC_SEL(cc, 79));
1546
         g = UP5(CC_SEL(cc, 84));
1547
         r = UP5(CC_SEL(cc, 89));
1548
         a = UP5(CC_SEL(cc, 114));
1549
      } else {
1550
         b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1551
         g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1552
         r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1553
         a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1554
      }
1555
   } else {
1556
      /* lerp == 0 */
1557
 
1558
      if (t & 16) {
1559
         cc++;
1560
         t &= 15;
1561
      }
1562
      t = (cc[0] >> (t * 2)) & 3;
1563
 
1564
      if (t == 3) {
1565
         /* zero */
1566
         r = g = b = a = 0;
1567
      } else {
1568
         GLuint kk;
1569
         cc = (const GLuint *)code;
1570
         a = UP5(cc[3] >> (t * 5 + 13));
1571
         t *= 15;
1572
         cc = (const GLuint *)(code + 8 + t / 8);
1573
         kk = cc[0] >> (t & 7);
1574
         b = UP5(kk);
1575
         g = UP5(kk >> 5);
1576
         r = UP5(kk >> 10);
1577
      }
1578
   }
1579
   rgba[RCOMP] = r;
1580
   rgba[GCOMP] = g;
1581
   rgba[BCOMP] = b;
1582
   rgba[ACOMP] = a;
1583
}
1584
 
1585
 
1586
static void
1587
fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1588
               GLint i, GLint j, GLubyte *rgba)
1589
{
1590
   static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1591
      fxt1_decode_1HI,     /* cc-high   = "00?" */
1592
      fxt1_decode_1HI,     /* cc-high   = "00?" */
1593
      fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1594
      fxt1_decode_1ALPHA,  /* alpha     = "011" */
1595
      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1596
      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1597
      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1598
      fxt1_decode_1MIXED   /* mixed     = "1??" */
1599
   };
1600
 
1601
   const GLubyte *code = (const GLubyte *)texture +
1602
                         ((j / 4) * (stride / 8) + (i / 8)) * 16;
1603
   GLint mode = CC_SEL(code, 125);
1604
   GLint t = i & 7;
1605
 
1606
   if (t & 4) {
1607
      t += 12;
1608
   }
1609
   t += (j & 3) * 4;
1610
 
1611
   decode_1[mode](code, t, rgba);
1612
}
1613
 
1614
 
1615
 
1616
 
1617
static void
1618
fetch_rgb_fxt1(const GLubyte *map,
1619
               GLint rowStride, GLint i, GLint j, GLfloat *texel)
1620
{
1621
   GLubyte rgba[4];
1622
   fxt1_decode_1(map, rowStride, i, j, rgba);
1623
   texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1624
   texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1625
   texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1626
   texel[ACOMP] = 1.0F;
1627
}
1628
 
1629
 
1630
static void
1631
fetch_rgba_fxt1(const GLubyte *map,
1632
                GLint rowStride, GLint i, GLint j, GLfloat *texel)
1633
{
1634
   GLubyte rgba[4];
1635
   fxt1_decode_1(map, rowStride, i, j, rgba);
1636
   texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1637
   texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1638
   texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1639
   texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1640
}
1641
 
1642
 
1643
compressed_fetch_func
1644
_mesa_get_fxt_fetch_func(gl_format format)
1645
{
1646
   switch (format) {
1647
   case MESA_FORMAT_RGB_FXT1:
1648
      return fetch_rgb_fxt1;
1649
   case MESA_FORMAT_RGBA_FXT1:
1650
      return fetch_rgba_fxt1;
1651
   default:
1652
      return NULL;
1653
   }
1654
}