WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/src/mesa/main/texcompress_fxt1.c

Rev	Author	Line No.	Line
4358	Serge	1	/*
		2	* Mesa 3-D graphics library
		3	*
		4	* Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
		5	*
		6	* Permission is hereby granted, free of charge, to any person obtaining a
		7	* copy of this software and associated documentation files (the "Software"),
		8	* to deal in the Software without restriction, including without limitation
		9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		10	* and/or sell copies of the Software, and to permit persons to whom the
		11	* Software is furnished to do so, subject to the following conditions:
		12	*
		13	* The above copyright notice and this permission notice shall be included
		14	* in all copies or substantial portions of the Software.
		15	*
		16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
		17	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
		20	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
		21	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
		22	* OTHER DEALINGS IN THE SOFTWARE.
		23	*/
		24
		25
		26	/**
		27	* \file texcompress_fxt1.c
		28	* GL_3DFX_texture_compression_FXT1 support.
		29	*/
		30
		31
		32	#include "glheader.h"
		33	#include "imports.h"
		34	#include "colormac.h"
		35	#include "image.h"
		36	#include "macros.h"
		37	#include "mipmap.h"
		38	#include "texcompress.h"
		39	#include "texcompress_fxt1.h"
		40	#include "texstore.h"
		41
		42
		43	static void
		44	fxt1_encode (GLuint width, GLuint height, GLint comps,
		45	const void *source, GLint srcRowStride,
		46	void *dest, GLint destRowStride);
		47
		48	static void
		49	fxt1_decode_1 (const void *texture, GLint stride,
		50	GLint i, GLint j, GLubyte *rgba);
		51
		52
		53	/**
		54	* Store user's image in rgb_fxt1 format.
		55	*/
		56	GLboolean
		57	_mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
		58	{
		59	const GLubyte *pixels;
		60	GLint srcRowStride;
		61	GLubyte *dst;
		62	const GLubyte *tempImage = NULL;
		63
		64	ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
		65
		66	if (srcFormat != GL_RGB \|\|
		67	srcType != GL_UNSIGNED_BYTE \|\|
		68	ctx->_ImageTransferState \|\|
		69	srcPacking->RowLength != srcWidth \|\|
		70	srcPacking->SwapBytes) {
		71	/* convert image to RGB/GLubyte */
		72	tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
		73	baseInternalFormat,
		74	_mesa_get_format_base_format(dstFormat),
		75	srcWidth, srcHeight, srcDepth,
		76	srcFormat, srcType, srcAddr,
		77	srcPacking);
		78	if (!tempImage)
		79	return GL_FALSE; /* out of memory */
		80	pixels = tempImage;
		81	srcRowStride = 3 * srcWidth;
		82	srcFormat = GL_RGB;
		83	}
		84	else {
		85	pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
		86	srcFormat, srcType, 0, 0);
		87
		88	srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
		89	srcType) / sizeof(GLubyte);
		90	}
		91
		92	dst = dstSlices[0];
		93
		94	fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
		95	dst, dstRowStride);
		96
		97	free((void*) tempImage);
		98
		99	return GL_TRUE;
		100	}
		101
		102
		103	/**
		104	* Store user's image in rgba_fxt1 format.
		105	*/
		106	GLboolean
		107	_mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
		108	{
		109	const GLubyte *pixels;
		110	GLint srcRowStride;
		111	GLubyte *dst;
		112	const GLubyte *tempImage = NULL;
		113
		114	ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
		115
		116	if (srcFormat != GL_RGBA \|\|
		117	srcType != GL_UNSIGNED_BYTE \|\|
		118	ctx->_ImageTransferState \|\|
		119	srcPacking->SwapBytes) {
		120	/* convert image to RGBA/GLubyte */
		121	tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
		122	baseInternalFormat,
		123	_mesa_get_format_base_format(dstFormat),
		124	srcWidth, srcHeight, srcDepth,
		125	srcFormat, srcType, srcAddr,
		126	srcPacking);
		127	if (!tempImage)
		128	return GL_FALSE; /* out of memory */
		129	pixels = tempImage;
		130	srcRowStride = 4 * srcWidth;
		131	srcFormat = GL_RGBA;
		132	}
		133	else {
		134	pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
		135	srcFormat, srcType, 0, 0);
		136
		137	srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
		138	srcType) / sizeof(GLubyte);
		139	}
		140
		141	dst = dstSlices[0];
		142
		143	fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
		144	dst, dstRowStride);
		145
		146	free((void*) tempImage);
		147
		148	return GL_TRUE;
		149	}
		150
		151
		152	/***************************************************************************\
		153	* FXT1 encoder
		154	*
		155	* The encoder was built by reversing the decoder,
		156	* and is vaguely based on Texus2 by 3dfx. Note that this code
		157	* is merely a proof of concept, since it is highly UNoptimized;
		158	* moreover, it is sub-optimal due to initial conditions passed
		159	* to Lloyd's algorithm (the interpolation modes are even worse).
		160	\***************************************************************************/
		161
		162
		163	#define MAX_COMP 4 /* ever needed maximum number of components in texel */
		164	#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
		165	#define N_TEXELS 32 /* number of texels in a block (always 32) */
		166	#define LL_N_REP 50 /* number of iterations in lloyd's vq */
		167	#define LL_RMS_D 10 /* fault tolerance (maximum delta) */
		168	#define LL_RMS_E 255 /* fault tolerance (maximum error) */
		169	#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
		170	#define ISTBLACK(v) (((GLuint )(v)) == 0)
		171
		172
		173	/*
		174	* Define a 64-bit unsigned integer type and macros
		175	*/
		176	#if 1
		177
		178	#define FX64_NATIVE 1
		179
		180	typedef uint64_t Fx64;
		181
		182	#define FX64_MOV32(a, b) a = b
		183	#define FX64_OR32(a, b) a \|= b
		184	#define FX64_SHL(a, c) a <<= c
		185
		186	#else
		187
		188	#define FX64_NATIVE 0
		189
		190	typedef struct {
		191	GLuint lo, hi;
		192	} Fx64;
		193
		194	#define FX64_MOV32(a, b) a.lo = b
		195	#define FX64_OR32(a, b) a.lo \|= b
		196
		197	#define FX64_SHL(a, c) \
		198	do { \
		199	if ((c) >= 32) { \
		200	a.hi = a.lo << ((c) - 32); \
		201	a.lo = 0; \
		202	} else { \
		203	a.hi = (a.hi << (c)) \| (a.lo >> (32 - (c))); \
		204	a.lo <<= (c); \
		205	} \
		206	} while (0)
		207
		208	#endif
		209
		210
		211	#define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
		212	#define SAFECDOT 1 /* for paranoids */
		213
		214	#define MAKEIVEC(NV, NC, IV, B, V0, V1) \
		215	do { \
		216	/* compute interpolation vector */ \
		217	GLfloat d2 = 0.0F; \
		218	GLfloat rd2; \
		219	\
		220	for (i = 0; i < NC; i++) { \
		221	IV[i] = (V1[i] - V0[i]) * F(i); \
		222	d2 += IV[i] * IV[i]; \
		223	} \
		224	rd2 = (GLfloat)NV / d2; \
		225	B = 0; \
		226	for (i = 0; i < NC; i++) { \
		227	IV[i] *= F(i); \
		228	B -= IV[i] * V0[i]; \
		229	IV[i] *= rd2; \
		230	} \
		231	B = B * rd2 + 0.5f; \
		232	} while (0)
		233
		234	#define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
		235	do { \
		236	GLfloat dot = 0.0F; \
		237	for (i = 0; i < NC; i++) { \
		238	dot += V[i] * IV[i]; \
		239	} \
		240	TEXEL = (GLint)(dot + B); \
		241	if (SAFECDOT) { \
		242	if (TEXEL < 0) { \
		243	TEXEL = 0; \
		244	} else if (TEXEL > NV) { \
		245	TEXEL = NV; \
		246	} \
		247	} \
		248	} while (0)
		249
		250
		251	static GLint
		252	fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
		253	GLubyte input[MAX_COMP], GLint nc)
		254	{
		255	GLint i, j, best = -1;
		256	GLfloat err = 1e9; /* big enough */
		257
		258	for (j = 0; j < nv; j++) {
		259	GLfloat e = 0.0F;
		260	for (i = 0; i < nc; i++) {
		261	e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
		262	}
		263	if (e < err) {
		264	err = e;
		265	best = j;
		266	}
		267	}
		268
		269	return best;
		270	}
		271
		272
		273	static GLint
		274	fxt1_worst (GLfloat vec[MAX_COMP],
		275	GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
		276	{
		277	GLint i, k, worst = -1;
		278	GLfloat err = -1.0F; /* small enough */
		279
		280	for (k = 0; k < n; k++) {
		281	GLfloat e = 0.0F;
		282	for (i = 0; i < nc; i++) {
		283	e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
		284	}
		285	if (e > err) {
		286	err = e;
		287	worst = k;
		288	}
		289	}
		290
		291	return worst;
		292	}
		293
		294
		295	static GLint
		296	fxt1_variance (GLdouble variance[MAX_COMP],
		297	GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
		298	{
		299	GLint i, k, best = 0;
		300	GLint sx, sx2;
		301	GLdouble var, maxvar = -1; /* small enough */
		302	GLdouble teenth = 1.0 / n;
		303
		304	for (i = 0; i < nc; i++) {
		305	sx = sx2 = 0;
		306	for (k = 0; k < n; k++) {
		307	GLint t = input[k][i];
		308	sx += t;
		309	sx2 += t * t;
		310	}
		311	var = sx2 * teenth - sx * sx * teenth * teenth;
		312	if (maxvar < var) {
		313	maxvar = var;
		314	best = i;
		315	}
		316	if (variance) {
		317	variance[i] = var;
		318	}
		319	}
		320
		321	return best;
		322	}
		323
		324
		325	static GLint
		326	fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
		327	GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
		328	{
		329	#if 0
		330	/* Choose colors from a grid.
		331	*/
		332	GLint i, j;
		333
		334	for (j = 0; j < nv; j++) {
		335	GLint m = j * (n - 1) / (nv - 1);
		336	for (i = 0; i < nc; i++) {
		337	vec[j][i] = input[m][i];
		338	}
		339	}
		340	#else
		341	/* Our solution here is to find the darkest and brightest colors in
		342	* the 8x4 tile and use those as the two representative colors.
		343	* There are probably better algorithms to use (histogram-based).
		344	*/
		345	GLint i, j, k;
		346	GLint minSum = 2000; /* big enough */
		347	GLint maxSum = -1; /* small enough */
		348	GLint minCol = 0; /* phoudoin: silent compiler! */
		349	GLint maxCol = 0; /* phoudoin: silent compiler! */
		350
		351	struct {
		352	GLint flag;
		353	GLint key;
		354	GLint freq;
		355	GLint idx;
		356	} hist[N_TEXELS];
		357	GLint lenh = 0;
		358
		359	memset(hist, 0, sizeof(hist));
		360
		361	for (k = 0; k < n; k++) {
		362	GLint l;
		363	GLint key = 0;
		364	GLint sum = 0;
		365	for (i = 0; i < nc; i++) {
		366	key <<= 8;
		367	key \|= input[k][i];
		368	sum += input[k][i];
		369	}
		370	for (l = 0; l < n; l++) {
		371	if (!hist[l].flag) {
		372	/* alloc new slot */
		373	hist[l].flag = !0;
		374	hist[l].key = key;
		375	hist[l].freq = 1;
		376	hist[l].idx = k;
		377	lenh = l + 1;
		378	break;
		379	} else if (hist[l].key == key) {
		380	hist[l].freq++;
		381	break;
		382	}
		383	}
		384	if (minSum > sum) {
		385	minSum = sum;
		386	minCol = k;
		387	}
		388	if (maxSum < sum) {
		389	maxSum = sum;
		390	maxCol = k;
		391	}
		392	}
		393
		394	if (lenh <= nv) {
		395	for (j = 0; j < lenh; j++) {
		396	for (i = 0; i < nc; i++) {
		397	vec[j][i] = (GLfloat)input[hist[j].idx][i];
		398	}
		399	}
		400	for (; j < nv; j++) {
		401	for (i = 0; i < nc; i++) {
		402	vec[j][i] = vec[0][i];
		403	}
		404	}
		405	return 0;
		406	}
		407
		408	for (j = 0; j < nv; j++) {
		409	for (i = 0; i < nc; i++) {
		410	vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
		411	}
		412	}
		413	#endif
		414
		415	return !0;
		416	}
		417
		418
		419	static GLint
		420	fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
		421	GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
		422	{
		423	/* Use the generalized lloyd's algorithm for VQ:
		424	* find 4 color vectors.
		425	*
		426	* for each sample color
		427	* sort to nearest vector.
		428	*
		429	* replace each vector with the centroid of its matching colors.
		430	*
		431	* repeat until RMS doesn't improve.
		432	*
		433	* if a color vector has no samples, or becomes the same as another
		434	* vector, replace it with the color which is farthest from a sample.
		435	*
		436	* vec[][MAX_COMP] initial vectors and resulting colors
		437	* nv number of resulting colors required
		438	* input[N_TEXELS][MAX_COMP] input texels
		439	* nc number of components in input / vec
		440	* n number of input samples
		441	*/
		442
		443	GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
		444	GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
		445	GLfloat error, lasterror = 1e9;
		446
		447	GLint i, j, k, rep;
		448
		449	/* the quantizer */
		450	for (rep = 0; rep < LL_N_REP; rep++) {
		451	/* reset sums & counters */
		452	for (j = 0; j < nv; j++) {
		453	for (i = 0; i < nc; i++) {
		454	sum[j][i] = 0;
		455	}
		456	cnt[j] = 0;
		457	}
		458	error = 0;
		459
		460	/* scan whole block */
		461	for (k = 0; k < n; k++) {
		462	#if 1
		463	GLint best = -1;
		464	GLfloat err = 1e9; /* big enough */
		465	/* determine best vector */
		466	for (j = 0; j < nv; j++) {
		467	GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
		468	(vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
		469	(vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
		470	if (nc == 4) {
		471	e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
		472	}
		473	if (e < err) {
		474	err = e;
		475	best = j;
		476	}
		477	}
		478	#else
		479	GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
		480	#endif
		481	assert(best >= 0);
		482	/* add in closest color */
		483	for (i = 0; i < nc; i++) {
		484	sum[best][i] += input[k][i];
		485	}
		486	/* mark this vector as used */
		487	cnt[best]++;
		488	/* accumulate error */
		489	error += err;
		490	}
		491
		492	/* check RMS */
		493	if ((error < LL_RMS_E) \|\|
		494	((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
		495	return !0; /* good match */
		496	}
		497	lasterror = error;
		498
		499	/* move each vector to the barycenter of its closest colors */
		500	for (j = 0; j < nv; j++) {
		501	if (cnt[j]) {
		502	GLfloat div = 1.0F / cnt[j];
		503	for (i = 0; i < nc; i++) {
		504	vec[j][i] = div * sum[j][i];
		505	}
		506	} else {
		507	/* this vec has no samples or is identical with a previous vec */
		508	GLint worst = fxt1_worst(vec[j], input, nc, n);
		509	for (i = 0; i < nc; i++) {
		510	vec[j][i] = input[worst][i];
		511	}
		512	}
		513	}
		514	}
		515
		516	return 0; /* could not converge fast enough */
		517	}
		518
		519
		520	static void
		521	fxt1_quantize_CHROMA (GLuint *cc,
		522	GLubyte input[N_TEXELS][MAX_COMP])
		523	{
		524	const GLint n_vect = 4; /* 4 base vectors to find */
		525	const GLint n_comp = 3; /* 3 components: R, G, B */
		526	GLfloat vec[MAX_VECT][MAX_COMP];
		527	GLint i, j, k;
		528	Fx64 hi; /* high quadword */
		529	GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
		530
		531	if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
		532	fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
		533	}
		534
		535	FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
		536	for (j = n_vect - 1; j >= 0; j--) {
		537	for (i = 0; i < n_comp; i++) {
		538	/* add in colors */
		539	FX64_SHL(hi, 5);
		540	FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
		541	}
		542	}
		543	((Fx64 *)cc)[1] = hi;
		544
		545	lohi = lolo = 0;
		546	/* right microtile */
		547	for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
		548	lohi <<= 2;
		549	lohi \|= fxt1_bestcol(vec, n_vect, input[k], n_comp);
		550	}
		551	/* left microtile */
		552	for (; k >= 0; k--) {
		553	lolo <<= 2;
		554	lolo \|= fxt1_bestcol(vec, n_vect, input[k], n_comp);
		555	}
		556	cc[1] = lohi;
		557	cc[0] = lolo;
		558	}
		559
		560
		561	static void
		562	fxt1_quantize_ALPHA0 (GLuint *cc,
		563	GLubyte input[N_TEXELS][MAX_COMP],
		564	GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
		565	{
		566	const GLint n_vect = 3; /* 3 base vectors to find */
		567	const GLint n_comp = 4; /* 4 components: R, G, B, A */
		568	GLfloat vec[MAX_VECT][MAX_COMP];
		569	GLint i, j, k;
		570	Fx64 hi; /* high quadword */
		571	GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
		572
		573	/* the last vector indicates zero */
		574	for (i = 0; i < n_comp; i++) {
		575	vec[n_vect][i] = 0;
		576	}
		577
		578	/* the first n texels in reord are guaranteed to be non-zero */
		579	if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
		580	fxt1_lloyd(vec, n_vect, reord, n_comp, n);
		581	}
		582
		583	FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
		584	for (j = n_vect - 1; j >= 0; j--) {
		585	/* add in alphas */
		586	FX64_SHL(hi, 5);
		587	FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
		588	}
		589	for (j = n_vect - 1; j >= 0; j--) {
		590	for (i = 0; i < n_comp - 1; i++) {
		591	/* add in colors */
		592	FX64_SHL(hi, 5);
		593	FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
		594	}
		595	}
		596	((Fx64 *)cc)[1] = hi;
		597
		598	lohi = lolo = 0;
		599	/* right microtile */
		600	for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
		601	lohi <<= 2;
		602	lohi \|= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
		603	}
		604	/* left microtile */
		605	for (; k >= 0; k--) {
		606	lolo <<= 2;
		607	lolo \|= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
		608	}
		609	cc[1] = lohi;
		610	cc[0] = lolo;
		611	}
		612
		613
		614	static void
		615	fxt1_quantize_ALPHA1 (GLuint *cc,
		616	GLubyte input[N_TEXELS][MAX_COMP])
		617	{
		618	const GLint n_vect = 3; /* highest vector number in each microtile */
		619	const GLint n_comp = 4; /* 4 components: R, G, B, A */
		620	GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
		621	GLfloat b, iv[MAX_COMP]; /* interpolation vector */
		622	GLint i, j, k;
		623	Fx64 hi; /* high quadword */
		624	GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
		625
		626	GLint minSum;
		627	GLint maxSum;
		628	GLint minColL = 0, maxColL = 0;
		629	GLint minColR = 0, maxColR = 0;
		630	GLint sumL = 0, sumR = 0;
		631	GLint nn_comp;
		632	/* Our solution here is to find the darkest and brightest colors in
		633	* the 4x4 tile and use those as the two representative colors.
		634	* There are probably better algorithms to use (histogram-based).
		635	*/
		636	nn_comp = n_comp;
		637	while ((minColL == maxColL) && nn_comp) {
		638	minSum = 2000; /* big enough */
		639	maxSum = -1; /* small enough */
		640	for (k = 0; k < N_TEXELS / 2; k++) {
		641	GLint sum = 0;
		642	for (i = 0; i < nn_comp; i++) {
		643	sum += input[k][i];
		644	}
		645	if (minSum > sum) {
		646	minSum = sum;
		647	minColL = k;
		648	}
		649	if (maxSum < sum) {
		650	maxSum = sum;
		651	maxColL = k;
		652	}
		653	sumL += sum;
		654	}
		655
		656	nn_comp--;
		657	}
		658
		659	nn_comp = n_comp;
		660	while ((minColR == maxColR) && nn_comp) {
		661	minSum = 2000; /* big enough */
		662	maxSum = -1; /* small enough */
		663	for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
		664	GLint sum = 0;
		665	for (i = 0; i < nn_comp; i++) {
		666	sum += input[k][i];
		667	}
		668	if (minSum > sum) {
		669	minSum = sum;
		670	minColR = k;
		671	}
		672	if (maxSum < sum) {
		673	maxSum = sum;
		674	maxColR = k;
		675	}
		676	sumR += sum;
		677	}
		678
		679	nn_comp--;
		680	}
		681
		682	/* choose the common vector (yuck!) */
		683	{
		684	GLint j1, j2;
		685	GLint v1 = 0, v2 = 0;
		686	GLfloat err = 1e9; /* big enough */
		687	GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
		688	for (i = 0; i < n_comp; i++) {
		689	tv[0][i] = input[minColL][i];
		690	tv[1][i] = input[maxColL][i];
		691	tv[2][i] = input[minColR][i];
		692	tv[3][i] = input[maxColR][i];
		693	}
		694	for (j1 = 0; j1 < 2; j1++) {
		695	for (j2 = 2; j2 < 4; j2++) {
		696	GLfloat e = 0.0F;
		697	for (i = 0; i < n_comp; i++) {
		698	e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
		699	}
		700	if (e < err) {
		701	err = e;
		702	v1 = j1;
		703	v2 = j2;
		704	}
		705	}
		706	}
		707	for (i = 0; i < n_comp; i++) {
		708	vec[0][i] = tv[1 - v1][i];
		709	vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
		710	vec[2][i] = tv[5 - v2][i];
		711	}
		712	}
		713
		714	/* left microtile */
		715	cc[0] = 0;
		716	if (minColL != maxColL) {
		717	/* compute interpolation vector */
		718	MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
		719
		720	/* add in texels */
		721	lolo = 0;
		722	for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
		723	GLint texel;
		724	/* interpolate color */
		725	CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
		726	/* add in texel */
		727	lolo <<= 2;
		728	lolo \|= texel;
		729	}
		730
		731	cc[0] = lolo;
		732	}
		733
		734	/* right microtile */
		735	cc[1] = 0;
		736	if (minColR != maxColR) {
		737	/* compute interpolation vector */
		738	MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
		739
		740	/* add in texels */
		741	lohi = 0;
		742	for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
		743	GLint texel;
		744	/* interpolate color */
		745	CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
		746	/* add in texel */
		747	lohi <<= 2;
		748	lohi \|= texel;
		749	}
		750
		751	cc[1] = lohi;
		752	}
		753
		754	FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
		755	for (j = n_vect - 1; j >= 0; j--) {
		756	/* add in alphas */
		757	FX64_SHL(hi, 5);
		758	FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
		759	}
		760	for (j = n_vect - 1; j >= 0; j--) {
		761	for (i = 0; i < n_comp - 1; i++) {
		762	/* add in colors */
		763	FX64_SHL(hi, 5);
		764	FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
		765	}
		766	}
		767	((Fx64 *)cc)[1] = hi;
		768	}
		769
		770
		771	static void
		772	fxt1_quantize_HI (GLuint *cc,
		773	GLubyte input[N_TEXELS][MAX_COMP],
		774	GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
		775	{
		776	const GLint n_vect = 6; /* highest vector number */
		777	const GLint n_comp = 3; /* 3 components: R, G, B */
		778	GLfloat b = 0.0F; /* phoudoin: silent compiler! */
		779	GLfloat iv[MAX_COMP]; /* interpolation vector */
		780	GLint i, k;
		781	GLuint hihi; /* high quadword: hi dword */
		782
		783	GLint minSum = 2000; /* big enough */
		784	GLint maxSum = -1; /* small enough */
		785	GLint minCol = 0; /* phoudoin: silent compiler! */
		786	GLint maxCol = 0; /* phoudoin: silent compiler! */
		787
		788	/* Our solution here is to find the darkest and brightest colors in
		789	* the 8x4 tile and use those as the two representative colors.
		790	* There are probably better algorithms to use (histogram-based).
		791	*/
		792	for (k = 0; k < n; k++) {
		793	GLint sum = 0;
		794	for (i = 0; i < n_comp; i++) {
		795	sum += reord[k][i];
		796	}
		797	if (minSum > sum) {
		798	minSum = sum;
		799	minCol = k;
		800	}
		801	if (maxSum < sum) {
		802	maxSum = sum;
		803	maxCol = k;
		804	}
		805	}
		806
		807	hihi = 0; /* cc-hi = "00" */
		808	for (i = 0; i < n_comp; i++) {
		809	/* add in colors */
		810	hihi <<= 5;
		811	hihi \|= reord[maxCol][i] >> 3;
		812	}
		813	for (i = 0; i < n_comp; i++) {
		814	/* add in colors */
		815	hihi <<= 5;
		816	hihi \|= reord[minCol][i] >> 3;
		817	}
		818	cc[3] = hihi;
		819	cc[0] = cc[1] = cc[2] = 0;
		820
		821	/* compute interpolation vector */
		822	if (minCol != maxCol) {
		823	MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
		824	}
		825
		826	/* add in texels */
		827	for (k = N_TEXELS - 1; k >= 0; k--) {
		828	GLint t = k * 3;
		829	GLuint kk = (GLuint )((char *)cc + t / 8);
		830	GLint texel = n_vect + 1; /* transparent black */
		831
		832	if (!ISTBLACK(input[k])) {
		833	if (minCol != maxCol) {
		834	/* interpolate color */
		835	CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
		836	/* add in texel */
		837	kk[0] \|= texel << (t & 7);
		838	}
		839	} else {
		840	/* add in texel */
		841	kk[0] \|= texel << (t & 7);
		842	}
		843	}
		844	}
		845
		846
		847	static void
		848	fxt1_quantize_MIXED1 (GLuint *cc,
		849	GLubyte input[N_TEXELS][MAX_COMP])
		850	{
		851	const GLint n_vect = 2; /* highest vector number in each microtile */
		852	const GLint n_comp = 3; /* 3 components: R, G, B */
		853	GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
		854	GLfloat b, iv[MAX_COMP]; /* interpolation vector */
		855	GLint i, j, k;
		856	Fx64 hi; /* high quadword */
		857	GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
		858
		859	GLint minSum;
		860	GLint maxSum;
		861	GLint minColL = 0, maxColL = -1;
		862	GLint minColR = 0, maxColR = -1;
		863
		864	/* Our solution here is to find the darkest and brightest colors in
		865	* the 4x4 tile and use those as the two representative colors.
		866	* There are probably better algorithms to use (histogram-based).
		867	*/
		868	minSum = 2000; /* big enough */
		869	maxSum = -1; /* small enough */
		870	for (k = 0; k < N_TEXELS / 2; k++) {
		871	if (!ISTBLACK(input[k])) {
		872	GLint sum = 0;
		873	for (i = 0; i < n_comp; i++) {
		874	sum += input[k][i];
		875	}
		876	if (minSum > sum) {
		877	minSum = sum;
		878	minColL = k;
		879	}
		880	if (maxSum < sum) {
		881	maxSum = sum;
		882	maxColL = k;
		883	}
		884	}
		885	}
		886	minSum = 2000; /* big enough */
		887	maxSum = -1; /* small enough */
		888	for (; k < N_TEXELS; k++) {
		889	if (!ISTBLACK(input[k])) {
		890	GLint sum = 0;
		891	for (i = 0; i < n_comp; i++) {
		892	sum += input[k][i];
		893	}
		894	if (minSum > sum) {
		895	minSum = sum;
		896	minColR = k;
		897	}
		898	if (maxSum < sum) {
		899	maxSum = sum;
		900	maxColR = k;
		901	}
		902	}
		903	}
		904
		905	/* left microtile */
		906	if (maxColL == -1) {
		907	/* all transparent black */
		908	cc[0] = ~0u;
		909	for (i = 0; i < n_comp; i++) {
		910	vec[0][i] = 0;
		911	vec[1][i] = 0;
		912	}
		913	} else {
		914	cc[0] = 0;
		915	for (i = 0; i < n_comp; i++) {
		916	vec[0][i] = input[minColL][i];
		917	vec[1][i] = input[maxColL][i];
		918	}
		919	if (minColL != maxColL) {
		920	/* compute interpolation vector */
		921	MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
		922
		923	/* add in texels */
		924	lolo = 0;
		925	for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
		926	GLint texel = n_vect + 1; /* transparent black */
		927	if (!ISTBLACK(input[k])) {
		928	/* interpolate color */
		929	CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
		930	}
		931	/* add in texel */
		932	lolo <<= 2;
		933	lolo \|= texel;
		934	}
		935	cc[0] = lolo;
		936	}
		937	}
		938
		939	/* right microtile */
		940	if (maxColR == -1) {
		941	/* all transparent black */
		942	cc[1] = ~0u;
		943	for (i = 0; i < n_comp; i++) {
		944	vec[2][i] = 0;
		945	vec[3][i] = 0;
		946	}
		947	} else {
		948	cc[1] = 0;
		949	for (i = 0; i < n_comp; i++) {
		950	vec[2][i] = input[minColR][i];
		951	vec[3][i] = input[maxColR][i];
		952	}
		953	if (minColR != maxColR) {
		954	/* compute interpolation vector */
		955	MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
		956
		957	/* add in texels */
		958	lohi = 0;
		959	for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
		960	GLint texel = n_vect + 1; /* transparent black */
		961	if (!ISTBLACK(input[k])) {
		962	/* interpolate color */
		963	CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
		964	}
		965	/* add in texel */
		966	lohi <<= 2;
		967	lohi \|= texel;
		968	}
		969	cc[1] = lohi;
		970	}
		971	}
		972
		973	FX64_MOV32(hi, 9 \| (vec[3][GCOMP] & 4) \| ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
		974	for (j = 2 * 2 - 1; j >= 0; j--) {
		975	for (i = 0; i < n_comp; i++) {
		976	/* add in colors */
		977	FX64_SHL(hi, 5);
		978	FX64_OR32(hi, vec[j][i] >> 3);
		979	}
		980	}
		981	((Fx64 *)cc)[1] = hi;
		982	}
		983
		984
		985	static void
		986	fxt1_quantize_MIXED0 (GLuint *cc,
		987	GLubyte input[N_TEXELS][MAX_COMP])
		988	{
		989	const GLint n_vect = 3; /* highest vector number in each microtile */
		990	const GLint n_comp = 3; /* 3 components: R, G, B */
		991	GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
		992	GLfloat b, iv[MAX_COMP]; /* interpolation vector */
		993	GLint i, j, k;
		994	Fx64 hi; /* high quadword */
		995	GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
		996
		997	GLint minColL = 0, maxColL = 0;
		998	GLint minColR = 0, maxColR = 0;
		999	#if 0
		1000	GLint minSum;
		1001	GLint maxSum;
		1002
		1003	/* Our solution here is to find the darkest and brightest colors in
		1004	* the 4x4 tile and use those as the two representative colors.
		1005	* There are probably better algorithms to use (histogram-based).
		1006	*/
		1007	minSum = 2000; /* big enough */
		1008	maxSum = -1; /* small enough */
		1009	for (k = 0; k < N_TEXELS / 2; k++) {
		1010	GLint sum = 0;
		1011	for (i = 0; i < n_comp; i++) {
		1012	sum += input[k][i];
		1013	}
		1014	if (minSum > sum) {
		1015	minSum = sum;
		1016	minColL = k;
		1017	}
		1018	if (maxSum < sum) {
		1019	maxSum = sum;
		1020	maxColL = k;
		1021	}
		1022	}
		1023	minSum = 2000; /* big enough */
		1024	maxSum = -1; /* small enough */
		1025	for (; k < N_TEXELS; k++) {
		1026	GLint sum = 0;
		1027	for (i = 0; i < n_comp; i++) {
		1028	sum += input[k][i];
		1029	}
		1030	if (minSum > sum) {
		1031	minSum = sum;
		1032	minColR = k;
		1033	}
		1034	if (maxSum < sum) {
		1035	maxSum = sum;
		1036	maxColR = k;
		1037	}
		1038	}
		1039	#else
		1040	GLint minVal;
		1041	GLint maxVal;
		1042	GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
		1043	GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
		1044
		1045	/* Scan the channel with max variance for lo & hi
		1046	* and use those as the two representative colors.
		1047	*/
		1048	minVal = 2000; /* big enough */
		1049	maxVal = -1; /* small enough */
		1050	for (k = 0; k < N_TEXELS / 2; k++) {
		1051	GLint t = input[k][maxVarL];
		1052	if (minVal > t) {
		1053	minVal = t;
		1054	minColL = k;
		1055	}
		1056	if (maxVal < t) {
		1057	maxVal = t;
		1058	maxColL = k;
		1059	}
		1060	}
		1061	minVal = 2000; /* big enough */
		1062	maxVal = -1; /* small enough */
		1063	for (; k < N_TEXELS; k++) {
		1064	GLint t = input[k][maxVarR];
		1065	if (minVal > t) {
		1066	minVal = t;
		1067	minColR = k;
		1068	}
		1069	if (maxVal < t) {
		1070	maxVal = t;
		1071	maxColR = k;
		1072	}
		1073	}
		1074	#endif
		1075
		1076	/* left microtile */
		1077	cc[0] = 0;
		1078	for (i = 0; i < n_comp; i++) {
		1079	vec[0][i] = input[minColL][i];
		1080	vec[1][i] = input[maxColL][i];
		1081	}
		1082	if (minColL != maxColL) {
		1083	/* compute interpolation vector */
		1084	MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
		1085
		1086	/* add in texels */
		1087	lolo = 0;
		1088	for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
		1089	GLint texel;
		1090	/* interpolate color */
		1091	CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
		1092	/* add in texel */
		1093	lolo <<= 2;
		1094	lolo \|= texel;
		1095	}
		1096
		1097	/* funky encoding for LSB of green */
		1098	if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
		1099	for (i = 0; i < n_comp; i++) {
		1100	vec[1][i] = input[minColL][i];
		1101	vec[0][i] = input[maxColL][i];
		1102	}
		1103	lolo = ~lolo;
		1104	}
		1105
		1106	cc[0] = lolo;
		1107	}
		1108
		1109	/* right microtile */
		1110	cc[1] = 0;
		1111	for (i = 0; i < n_comp; i++) {
		1112	vec[2][i] = input[minColR][i];
		1113	vec[3][i] = input[maxColR][i];
		1114	}
		1115	if (minColR != maxColR) {
		1116	/* compute interpolation vector */
		1117	MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
		1118
		1119	/* add in texels */
		1120	lohi = 0;
		1121	for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
		1122	GLint texel;
		1123	/* interpolate color */
		1124	CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
		1125	/* add in texel */
		1126	lohi <<= 2;
		1127	lohi \|= texel;
		1128	}
		1129
		1130	/* funky encoding for LSB of green */
		1131	if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
		1132	for (i = 0; i < n_comp; i++) {
		1133	vec[3][i] = input[minColR][i];
		1134	vec[2][i] = input[maxColR][i];
		1135	}
		1136	lohi = ~lohi;
		1137	}
		1138
		1139	cc[1] = lohi;
		1140	}
		1141
		1142	FX64_MOV32(hi, 8 \| (vec[3][GCOMP] & 4) \| ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
		1143	for (j = 2 * 2 - 1; j >= 0; j--) {
		1144	for (i = 0; i < n_comp; i++) {
		1145	/* add in colors */
		1146	FX64_SHL(hi, 5);
		1147	FX64_OR32(hi, vec[j][i] >> 3);
		1148	}
		1149	}
		1150	((Fx64 *)cc)[1] = hi;
		1151	}
		1152
		1153
		1154	static void
		1155	fxt1_quantize (GLuint cc, const GLubyte lines[], GLint comps)
		1156	{
		1157	GLint trualpha;
		1158	GLubyte reord[N_TEXELS][MAX_COMP];
		1159
		1160	GLubyte input[N_TEXELS][MAX_COMP];
		1161	GLint i, k, l;
		1162
		1163	if (comps == 3) {
		1164	/* make the whole block opaque */
		1165	memset(input, -1, sizeof(input));
		1166	}
		1167
		1168	/* 8 texels each line */
		1169	for (l = 0; l < 4; l++) {
		1170	for (k = 0; k < 4; k++) {
		1171	for (i = 0; i < comps; i++) {
		1172	input[k + l * 4][i] = *lines[l]++;
		1173	}
		1174	}
		1175	for (; k < 8; k++) {
		1176	for (i = 0; i < comps; i++) {
		1177	input[k + l * 4 + 12][i] = *lines[l]++;
		1178	}
		1179	}
		1180	}
		1181
		1182	/* block layout:
		1183	* 00, 01, 02, 03, 08, 09, 0a, 0b
		1184	* 10, 11, 12, 13, 18, 19, 1a, 1b
		1185	* 04, 05, 06, 07, 0c, 0d, 0e, 0f
		1186	* 14, 15, 16, 17, 1c, 1d, 1e, 1f
		1187	*/
		1188
		1189	/* [dBorca]
		1190	* stupidity flows forth from this
		1191	*/
		1192	l = N_TEXELS;
		1193	trualpha = 0;
		1194	if (comps == 4) {
		1195	/* skip all transparent black texels */
		1196	l = 0;
		1197	for (k = 0; k < N_TEXELS; k++) {
		1198	/* test all components against 0 */
		1199	if (!ISTBLACK(input[k])) {
		1200	/* texel is not transparent black */
		1201	COPY_4UBV(reord[l], input[k]);
		1202	if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
		1203	/* non-opaque texel */
		1204	trualpha = !0;
		1205	}
		1206	l++;
		1207	}
		1208	}
		1209	}
		1210
		1211	#if 0
		1212	if (trualpha) {
		1213	fxt1_quantize_ALPHA0(cc, input, reord, l);
		1214	} else if (l == 0) {
		1215	cc[0] = cc[1] = cc[2] = -1;
		1216	cc[3] = 0;
		1217	} else if (l < N_TEXELS) {
		1218	fxt1_quantize_HI(cc, input, reord, l);
		1219	} else {
		1220	fxt1_quantize_CHROMA(cc, input);
		1221	}
		1222	(void)fxt1_quantize_ALPHA1;
		1223	(void)fxt1_quantize_MIXED1;
		1224	(void)fxt1_quantize_MIXED0;
		1225	#else
		1226	if (trualpha) {
		1227	fxt1_quantize_ALPHA1(cc, input);
		1228	} else if (l == 0) {
		1229	cc[0] = cc[1] = cc[2] = ~0u;
		1230	cc[3] = 0;
		1231	} else if (l < N_TEXELS) {
		1232	fxt1_quantize_MIXED1(cc, input);
		1233	} else {
		1234	fxt1_quantize_MIXED0(cc, input);
		1235	}
		1236	(void)fxt1_quantize_ALPHA0;
		1237	(void)fxt1_quantize_HI;
		1238	(void)fxt1_quantize_CHROMA;
		1239	#endif
		1240	}
		1241
		1242
		1243
		1244	/**
		1245	* Upscale an image by replication, not (typical) stretching.
		1246	* We use this when the image width or height is less than a
		1247	* certain size (4, 8) and we need to upscale an image.
		1248	*/
		1249	static void
		1250	upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
		1251	GLsizei outWidth, GLsizei outHeight,
		1252	GLint comps, const GLubyte *src, GLint srcRowStride,
		1253	GLubyte *dest )
		1254	{
		1255	GLint i, j, k;
		1256
		1257	ASSERT(outWidth >= inWidth);
		1258	ASSERT(outHeight >= inHeight);
		1259	#if 0
		1260	ASSERT(inWidth == 1 \|\| inWidth == 2 \|\| inHeight == 1 \|\| inHeight == 2);
		1261	ASSERT((outWidth & 3) == 0);
		1262	ASSERT((outHeight & 3) == 0);
		1263	#endif
		1264
		1265	for (i = 0; i < outHeight; i++) {
		1266	const GLint ii = i % inHeight;
		1267	for (j = 0; j < outWidth; j++) {
		1268	const GLint jj = j % inWidth;
		1269	for (k = 0; k < comps; k++) {
		1270	dest[(i * outWidth + j) * comps + k]
		1271	= src[ii * srcRowStride + jj * comps + k];
		1272	}
		1273	}
		1274	}
		1275	}
		1276
		1277
		1278	static void
		1279	fxt1_encode (GLuint width, GLuint height, GLint comps,
		1280	const void *source, GLint srcRowStride,
		1281	void *dest, GLint destRowStride)
		1282	{
		1283	GLuint x, y;
		1284	const GLubyte *data;
		1285	GLuint encoded = (GLuint )dest;
		1286	void *newSource = NULL;
		1287
		1288	assert(comps == 3 \|\| comps == 4);
		1289
		1290	/* Replicate image if width is not M8 or height is not M4 */
		1291	if ((width & 7) \| (height & 3)) {
		1292	GLint newWidth = (width + 7) & ~7;
		1293	GLint newHeight = (height + 3) & ~3;
		1294	newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
		1295	if (!newSource) {
		1296	GET_CURRENT_CONTEXT(ctx);
		1297	_mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
		1298	goto cleanUp;
		1299	}
		1300	upscale_teximage2d(width, height, newWidth, newHeight,
		1301	comps, (const GLubyte *) source,
		1302	srcRowStride, (GLubyte *) newSource);
		1303	source = newSource;
		1304	width = newWidth;
		1305	height = newHeight;
		1306	srcRowStride = comps * newWidth;
		1307	}
		1308
		1309	data = (const GLubyte *) source;
		1310	destRowStride = (destRowStride - width * 2) / 4;
		1311	for (y = 0; y < height; y += 4) {
		1312	GLuint offs = 0 + (y + 0) * srcRowStride;
		1313	for (x = 0; x < width; x += 8) {
		1314	const GLubyte *lines[4];
		1315	lines[0] = &data[offs];
		1316	lines[1] = lines[0] + srcRowStride;
		1317	lines[2] = lines[1] + srcRowStride;
		1318	lines[3] = lines[2] + srcRowStride;
		1319	offs += 8 * comps;
		1320	fxt1_quantize(encoded, lines, comps);
		1321	/* 128 bits per 8x4 block */
		1322	encoded += 4;
		1323	}
		1324	encoded += destRowStride;
		1325	}
		1326
		1327	cleanUp:
		1328	free(newSource);
		1329	}
		1330
		1331
		1332	/***************************************************************************\
		1333	* FXT1 decoder
		1334	*
		1335	* The decoder is based on GL_3DFX_texture_compression_FXT1
		1336	* specification and serves as a concept for the encoder.
		1337	\***************************************************************************/
		1338
		1339
		1340	/* lookup table for scaling 5 bit colors up to 8 bits */
		1341	static const GLubyte _rgb_scale_5[] = {
		1342	0, 8, 16, 25, 33, 41, 49, 58,
		1343	66, 74, 82, 90, 99, 107, 115, 123,
		1344	132, 140, 148, 156, 165, 173, 181, 189,
		1345	197, 206, 214, 222, 230, 239, 247, 255
		1346	};
		1347
		1348	/* lookup table for scaling 6 bit colors up to 8 bits */
		1349	static const GLubyte _rgb_scale_6[] = {
		1350	0, 4, 8, 12, 16, 20, 24, 28,
		1351	32, 36, 40, 45, 49, 53, 57, 61,
		1352	65, 69, 73, 77, 81, 85, 89, 93,
		1353	97, 101, 105, 109, 113, 117, 121, 125,
		1354	130, 134, 138, 142, 146, 150, 154, 158,
		1355	162, 166, 170, 174, 178, 182, 186, 190,
		1356	194, 198, 202, 206, 210, 215, 219, 223,
		1357	227, 231, 235, 239, 243, 247, 251, 255
		1358	};
		1359
		1360
		1361	#define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
		1362	#define UP5(c) _rgb_scale_5[(c) & 31]
		1363	#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) \| ((b) & 1)]
		1364	#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
		1365
		1366
		1367	static void
		1368	fxt1_decode_1HI (const GLubyte code, GLint t, GLubyte rgba)
		1369	{
		1370	const GLuint *cc;
		1371
		1372	t *= 3;
		1373	cc = (const GLuint *)(code + t / 8);
		1374	t = (cc[0] >> (t & 7)) & 7;
		1375
		1376	if (t == 7) {
		1377	rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
		1378	} else {
		1379	GLubyte r, g, b;
		1380	cc = (const GLuint *)(code + 12);
		1381	if (t == 0) {
		1382	b = UP5(CC_SEL(cc, 0));
		1383	g = UP5(CC_SEL(cc, 5));
		1384	r = UP5(CC_SEL(cc, 10));
		1385	} else if (t == 6) {
		1386	b = UP5(CC_SEL(cc, 15));
		1387	g = UP5(CC_SEL(cc, 20));
		1388	r = UP5(CC_SEL(cc, 25));
		1389	} else {
		1390	b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
		1391	g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
		1392	r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
		1393	}
		1394	rgba[RCOMP] = r;
		1395	rgba[GCOMP] = g;
		1396	rgba[BCOMP] = b;
		1397	rgba[ACOMP] = 255;
		1398	}
		1399	}
		1400
		1401
		1402	static void
		1403	fxt1_decode_1CHROMA (const GLubyte code, GLint t, GLubyte rgba)
		1404	{
		1405	const GLuint *cc;
		1406	GLuint kk;
		1407
		1408	cc = (const GLuint *)code;
		1409	if (t & 16) {
		1410	cc++;
		1411	t &= 15;
		1412	}
		1413	t = (cc[0] >> (t * 2)) & 3;
		1414
		1415	t *= 15;
		1416	cc = (const GLuint *)(code + 8 + t / 8);
		1417	kk = cc[0] >> (t & 7);
		1418	rgba[BCOMP] = UP5(kk);
		1419	rgba[GCOMP] = UP5(kk >> 5);
		1420	rgba[RCOMP] = UP5(kk >> 10);
		1421	rgba[ACOMP] = 255;
		1422	}
		1423
		1424
		1425	static void
		1426	fxt1_decode_1MIXED (const GLubyte code, GLint t, GLubyte rgba)
		1427	{
		1428	const GLuint *cc;
		1429	GLuint col[2][3];
		1430	GLint glsb, selb;
		1431
		1432	cc = (const GLuint *)code;
		1433	if (t & 16) {
		1434	t &= 15;
		1435	t = (cc[1] >> (t * 2)) & 3;
		1436	/* col 2 */
		1437	col[0][BCOMP] = ((const GLuint )(code + 11)) >> 6;
		1438	col[0][GCOMP] = CC_SEL(cc, 99);
		1439	col[0][RCOMP] = CC_SEL(cc, 104);
		1440	/* col 3 */
		1441	col[1][BCOMP] = CC_SEL(cc, 109);
		1442	col[1][GCOMP] = CC_SEL(cc, 114);
		1443	col[1][RCOMP] = CC_SEL(cc, 119);
		1444	glsb = CC_SEL(cc, 126);
		1445	selb = CC_SEL(cc, 33);
		1446	} else {
		1447	t = (cc[0] >> (t * 2)) & 3;
		1448	/* col 0 */
		1449	col[0][BCOMP] = CC_SEL(cc, 64);
		1450	col[0][GCOMP] = CC_SEL(cc, 69);
		1451	col[0][RCOMP] = CC_SEL(cc, 74);
		1452	/* col 1 */
		1453	col[1][BCOMP] = CC_SEL(cc, 79);
		1454	col[1][GCOMP] = CC_SEL(cc, 84);
		1455	col[1][RCOMP] = CC_SEL(cc, 89);
		1456	glsb = CC_SEL(cc, 125);
		1457	selb = CC_SEL(cc, 1);
		1458	}
		1459
		1460	if (CC_SEL(cc, 124) & 1) {
		1461	/* alpha[0] == 1 */
		1462
		1463	if (t == 3) {
		1464	/* zero */
		1465	rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
		1466	} else {
		1467	GLubyte r, g, b;
		1468	if (t == 0) {
		1469	b = UP5(col[0][BCOMP]);
		1470	g = UP5(col[0][GCOMP]);
		1471	r = UP5(col[0][RCOMP]);
		1472	} else if (t == 2) {
		1473	b = UP5(col[1][BCOMP]);
		1474	g = UP6(col[1][GCOMP], glsb);
		1475	r = UP5(col[1][RCOMP]);
		1476	} else {
		1477	b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
		1478	g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
		1479	r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
		1480	}
		1481	rgba[RCOMP] = r;
		1482	rgba[GCOMP] = g;
		1483	rgba[BCOMP] = b;
		1484	rgba[ACOMP] = 255;
		1485	}
		1486	} else {
		1487	/* alpha[0] == 0 */
		1488	GLubyte r, g, b;
		1489	if (t == 0) {
		1490	b = UP5(col[0][BCOMP]);
		1491	g = UP6(col[0][GCOMP], glsb ^ selb);
		1492	r = UP5(col[0][RCOMP]);
		1493	} else if (t == 3) {
		1494	b = UP5(col[1][BCOMP]);
		1495	g = UP6(col[1][GCOMP], glsb);
		1496	r = UP5(col[1][RCOMP]);
		1497	} else {
		1498	b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
		1499	g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
		1500	UP6(col[1][GCOMP], glsb));
		1501	r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
		1502	}
		1503	rgba[RCOMP] = r;
		1504	rgba[GCOMP] = g;
		1505	rgba[BCOMP] = b;
		1506	rgba[ACOMP] = 255;
		1507	}
		1508	}
		1509
		1510
		1511	static void
		1512	fxt1_decode_1ALPHA (const GLubyte code, GLint t, GLubyte rgba)
		1513	{
		1514	const GLuint *cc;
		1515	GLubyte r, g, b, a;
		1516
		1517	cc = (const GLuint *)code;
		1518	if (CC_SEL(cc, 124) & 1) {
		1519	/* lerp == 1 */
		1520	GLuint col0[4];
		1521
		1522	if (t & 16) {
		1523	t &= 15;
		1524	t = (cc[1] >> (t * 2)) & 3;
		1525	/* col 2 */
		1526	col0[BCOMP] = ((const GLuint )(code + 11)) >> 6;
		1527	col0[GCOMP] = CC_SEL(cc, 99);
		1528	col0[RCOMP] = CC_SEL(cc, 104);
		1529	col0[ACOMP] = CC_SEL(cc, 119);
		1530	} else {
		1531	t = (cc[0] >> (t * 2)) & 3;
		1532	/* col 0 */
		1533	col0[BCOMP] = CC_SEL(cc, 64);
		1534	col0[GCOMP] = CC_SEL(cc, 69);
		1535	col0[RCOMP] = CC_SEL(cc, 74);
		1536	col0[ACOMP] = CC_SEL(cc, 109);
		1537	}
		1538
		1539	if (t == 0) {
		1540	b = UP5(col0[BCOMP]);
		1541	g = UP5(col0[GCOMP]);
		1542	r = UP5(col0[RCOMP]);
		1543	a = UP5(col0[ACOMP]);
		1544	} else if (t == 3) {
		1545	b = UP5(CC_SEL(cc, 79));
		1546	g = UP5(CC_SEL(cc, 84));
		1547	r = UP5(CC_SEL(cc, 89));
		1548	a = UP5(CC_SEL(cc, 114));
		1549	} else {
		1550	b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
		1551	g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
		1552	r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
		1553	a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
		1554	}
		1555	} else {
		1556	/* lerp == 0 */
		1557
		1558	if (t & 16) {
		1559	cc++;
		1560	t &= 15;
		1561	}
		1562	t = (cc[0] >> (t * 2)) & 3;
		1563
		1564	if (t == 3) {
		1565	/* zero */
		1566	r = g = b = a = 0;
		1567	} else {
		1568	GLuint kk;
		1569	cc = (const GLuint *)code;
		1570	a = UP5(cc[3] >> (t * 5 + 13));
		1571	t *= 15;
		1572	cc = (const GLuint *)(code + 8 + t / 8);
		1573	kk = cc[0] >> (t & 7);
		1574	b = UP5(kk);
		1575	g = UP5(kk >> 5);
		1576	r = UP5(kk >> 10);
		1577	}
		1578	}
		1579	rgba[RCOMP] = r;
		1580	rgba[GCOMP] = g;
		1581	rgba[BCOMP] = b;
		1582	rgba[ACOMP] = a;
		1583	}
		1584
		1585
		1586	static void
		1587	fxt1_decode_1 (const void texture, GLint stride, / in pixels */
		1588	GLint i, GLint j, GLubyte *rgba)
		1589	{
		1590	static void (decode_1[]) (const GLubyte , GLint, GLubyte *) = {
		1591	fxt1_decode_1HI, /* cc-high = "00?" */
		1592	fxt1_decode_1HI, /* cc-high = "00?" */
		1593	fxt1_decode_1CHROMA, /* cc-chroma = "010" */
		1594	fxt1_decode_1ALPHA, /* alpha = "011" */
		1595	fxt1_decode_1MIXED, /* mixed = "1??" */
		1596	fxt1_decode_1MIXED, /* mixed = "1??" */
		1597	fxt1_decode_1MIXED, /* mixed = "1??" */
		1598	fxt1_decode_1MIXED /* mixed = "1??" */
		1599	};
		1600
		1601	const GLubyte code = (const GLubyte )texture +
		1602	((j / 4) * (stride / 8) + (i / 8)) * 16;
		1603	GLint mode = CC_SEL(code, 125);
		1604	GLint t = i & 7;
		1605
		1606	if (t & 4) {
		1607	t += 12;
		1608	}
		1609	t += (j & 3) * 4;
		1610
		1611	decode_1[mode](code, t, rgba);
		1612	}
		1613
		1614
		1615
		1616
		1617	static void
		1618	fetch_rgb_fxt1(const GLubyte *map,
		1619	GLint rowStride, GLint i, GLint j, GLfloat *texel)
		1620	{
		1621	GLubyte rgba[4];
		1622	fxt1_decode_1(map, rowStride, i, j, rgba);
		1623	texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
		1624	texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
		1625	texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
		1626	texel[ACOMP] = 1.0F;
		1627	}
		1628
		1629
		1630	static void
		1631	fetch_rgba_fxt1(const GLubyte *map,
		1632	GLint rowStride, GLint i, GLint j, GLfloat *texel)
		1633	{
		1634	GLubyte rgba[4];
		1635	fxt1_decode_1(map, rowStride, i, j, rgba);
		1636	texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
		1637	texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
		1638	texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
		1639	texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
		1640	}
		1641
		1642
		1643	compressed_fetch_func
		1644	_mesa_get_fxt_fetch_func(gl_format format)
		1645	{
		1646	switch (format) {
		1647	case MESA_FORMAT_RGB_FXT1:
		1648	return fetch_rgb_fxt1;
		1649	case MESA_FORMAT_RGBA_FXT1:
		1650	return fetch_rgba_fxt1;
		1651	default:
		1652	return NULL;
		1653	}
		1654	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/src/mesa/main/texcompress_fxt1.c – Rev 4358