This repository was archived by the owner on Oct 31, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathYCoCgDXT.cpp
769 lines (612 loc) · 29.4 KB
/
YCoCgDXT.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
/*
YCoCgDXT.c
Hap Codec
Copyright (c) 2012-2013, Tom Butterworth and Vidvox LLC. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Based on code by J.M.P. van Waveren / id Software, Inc.
and changes by Chris Sidhall / Electronic Arts
My changes are trivial:
- Remove dependencies on other EAWebKit files
- Mark unexported functions as static
- Refactor to eliminate use of a global variable
- Correct spelling of NVIDIA_7X_HARDWARE_BUG_FIX macro
- Remove single usage of an assert macro
Copyright (C) 2009-2011 Electronic Arts, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of Electronic Arts, Inc. ("EA") nor the names of
its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY ELECTRONIC ARTS AND ITS CONTRIBUTORS "AS IS" AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ELECTRONIC ARTS OR ITS CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
///////////////////////////////////////////////////////////////////////////////
// BCImageCompressionEA.cpp
// File created by Chrs Sidhall
// Also please see Copyright (C) 2007 Id Software, Inc used in this file.
///////////////////////////////////////////////////////////////////////////////
#include "YCoCgDXT.h"
#include <string.h>
#include <stdlib.h>
/* ALWAYS_INLINE */
/* Derived from EAWebKit's AlwaysInline.h, losing some of its support for other compilers */
#ifndef ALWAYS_INLINE
#if (defined(__GNUC__) || defined(__clang__)) && !defined(DEBUG)
#define ALWAYS_INLINE inline __attribute__((__always_inline__))
#elif defined(_MSC_VER) && defined(NDEBUG)
#define ALWAYS_INLINE __forceinline
#else
#define ALWAYS_INLINE inline
#endif
#endif
// CSidhall Note: The compression code is directly from http://developer.nvidia.com/object/real-time-ycocg-dxt-compression.html
// It was missing some Emit functions but have tried to keep it as close as possible to the orignal version.
// Also removed some alpha handling which was never used and added a few overloaded functions (like ExtractBlock).
/*
Real-Time YCoCg DXT Compression
Copyright (C) 2007 Id Software, Inc.
Written by J.M.P. van Waveren
This code is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This code is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
*/
/*
* This code was modified by Electronic Arts Inc Copyright � 2009
*/
#ifndef word
typedef unsigned short word;
#endif
#ifndef dword
typedef unsigned int dword;
#endif
#define INSET_COLOR_SHIFT 4 // inset color bounding box
#define INSET_ALPHA_SHIFT 5 // inset alpha bounding box
#define C565_5_MASK 0xF8 // 0xFF minus last three bits
#define C565_6_MASK 0xFC // 0xFF minus last two bits
#define NVIDIA_G7X_HARDWARE_BUG_FIX // keep the colors sorted as: max, min
#if defined(__LITTLE_ENDIAN__) || defined(_WIN32)
#define EA_SYSTEM_LITTLE_ENDIAN
#endif
static ALWAYS_INLINE word ColorTo565( const byte *color ) {
return ( ( color[ 0 ] >> 3 ) << 11 ) | ( ( color[ 1 ] >> 2 ) << 5 ) | ( color[ 2 ] >> 3 );
}
static ALWAYS_INLINE void EmitByte( byte b, byte **outData ) {
(*outData)[0] = b;
*outData += 1;
}
static ALWAYS_INLINE void EmitUInt( unsigned int s, byte **outData ){
(*outData)[0] = ( s >> 0 ) & 255;
(*outData)[1] = ( s >> 8 ) & 255;
(*outData)[2] = ( s >> 16 ) & 255;
(*outData)[3] = ( s >> 24 ) & 255;
*outData += 4;
}
static ALWAYS_INLINE void EmitUShort( unsigned short s, byte **outData ){
(*outData)[0] = ( s >> 0 ) & 255;
(*outData)[1] = ( s >> 8 ) & 255;
*outData += 2;
}
static ALWAYS_INLINE void ExtractBlock( const byte *inPtr, const int stride, byte *colorBlock ) {
for ( int j = 0; j < 4; j++ ) {
memcpy( &colorBlock[j*4*4], inPtr, 4*4 );
inPtr += stride;
}
}
// This box extract replicates the last rows and columns if the row or columns are not 4 texels aligned
// This is so we don't get random pixels which could affect the color interpolation
static void ExtractBlock( const byte *inPtr, const int stride, const int widthRemain, const int heightRemain, byte *colorBlock ) {
int *pBlock32 = (int *) colorBlock; // Since we are using ARGA, we assume 4 byte alignment is already being used
int *pSource32 = (int*) inPtr;
int hIndex=0;
for(int j =0; j < 4; j++) {
int wIndex = 0;
for(int i=0; i < 4; i++) {
pBlock32[i] = pSource32[wIndex];
// Set up offset for next column source (keep existing if we are at the end)
if(wIndex < (widthRemain - 1)) {
wIndex++;
}
}
// Set up offset for next texel row source (keep existing if we are at the end)
pBlock32 +=4;
if(hIndex < (heightRemain-1)) {
pSource32 +=(stride >> 2);
hIndex++;
}
}
}
static void GetMinMaxYCoCg( byte *colorBlock, byte *minColor, byte *maxColor ) {
minColor[0] = minColor[1] = minColor[2] = minColor[3] = 255;
maxColor[0] = maxColor[1] = maxColor[2] = maxColor[3] = 0;
for ( int i = 0; i < 16; i++ ) {
if ( colorBlock[i*4+0] < minColor[0] ) {
minColor[0] = colorBlock[i*4+0];
}
if ( colorBlock[i*4+1] < minColor[1] ) {
minColor[1] = colorBlock[i*4+1];
}
// Note: the alpha is not used so no point in checking for it
// if ( colorBlock[i*4+2] < minColor[2] ) {
// minColor[2] = colorBlock[i*4+2];
// }
if ( colorBlock[i*4+3] < minColor[3] ) {
minColor[3] = colorBlock[i*4+3];
}
if ( colorBlock[i*4+0] > maxColor[0] ) {
maxColor[0] = colorBlock[i*4+0];
}
if ( colorBlock[i*4+1] > maxColor[1] ) {
maxColor[1] = colorBlock[i*4+1];
}
// Note: the alpha is not used so no point in checking for it
// if ( colorBlock[i*4+2] > maxColor[2] ) {
// maxColor[2] = colorBlock[i*4+2];
// }
if ( colorBlock[i*4+3] > maxColor[3] ) {
maxColor[3] = colorBlock[i*4+3];
}
}
}
// EA/Alex Mole: abs isn't inlined and gets called a *lot* in this code :)
// Let's make us an inlined one!
static ALWAYS_INLINE int absEA( int liArg )
{
return ( liArg >= 0 ) ? liArg : -liArg;
}
static void ScaleYCoCg( byte *colorBlock, byte *minColor, byte *maxColor ) {
int m0 = absEA( minColor[0] - 128 ); // (the 128 is to center to color to grey (128,128) )
int m1 = absEA( minColor[1] - 128 );
int m2 = absEA( maxColor[0] - 128 );
int m3 = absEA( maxColor[1] - 128 );
if ( m1 > m0 ) m0 = m1;
if ( m3 > m2 ) m2 = m3;
if ( m2 > m0 ) m0 = m2;
const int s0 = 128 / 2 - 1;
const int s1 = 128 / 4 - 1;
int mask0 = -( m0 <= s0 );
int mask1 = -( m0 <= s1 );
int scale = 1 + ( 1 & mask0 ) + ( 2 & mask1 );
minColor[0] = ( minColor[0] - 128 ) * scale + 128;
minColor[1] = ( minColor[1] - 128 ) * scale + 128;
minColor[2] = ( scale - 1 ) << 3;
maxColor[0] = ( maxColor[0] - 128 ) * scale + 128;
maxColor[1] = ( maxColor[1] - 128 ) * scale + 128;
maxColor[2] = ( scale - 1 ) << 3;
for ( int i = 0; i < 16; i++ ) {
colorBlock[i*4+0] = ( colorBlock[i*4+0] - 128 ) * scale + 128;
colorBlock[i*4+1] = ( colorBlock[i*4+1] - 128 ) * scale + 128;
}
}
static void InsetYCoCgBBox( byte *minColor, byte *maxColor ) {
int inset[4];
int mini[4];
int maxi[4];
inset[0] = ( maxColor[0] - minColor[0] ) - ((1<<(INSET_COLOR_SHIFT-1))-1);
inset[1] = ( maxColor[1] - minColor[1] ) - ((1<<(INSET_COLOR_SHIFT-1))-1);
inset[3] = ( maxColor[3] - minColor[3] ) - ((1<<(INSET_ALPHA_SHIFT-1))-1);
mini[0] = ( ( minColor[0] << INSET_COLOR_SHIFT ) + inset[0] ) >> INSET_COLOR_SHIFT;
mini[1] = ( ( minColor[1] << INSET_COLOR_SHIFT ) + inset[1] ) >> INSET_COLOR_SHIFT;
mini[3] = ( ( minColor[3] << INSET_ALPHA_SHIFT ) + inset[3] ) >> INSET_ALPHA_SHIFT;
maxi[0] = ( ( maxColor[0] << INSET_COLOR_SHIFT ) - inset[0] ) >> INSET_COLOR_SHIFT;
maxi[1] = ( ( maxColor[1] << INSET_COLOR_SHIFT ) - inset[1] ) >> INSET_COLOR_SHIFT;
maxi[3] = ( ( maxColor[3] << INSET_ALPHA_SHIFT ) - inset[3] ) >> INSET_ALPHA_SHIFT;
mini[0] = ( mini[0] >= 0 ) ? mini[0] : 0;
mini[1] = ( mini[1] >= 0 ) ? mini[1] : 0;
mini[3] = ( mini[3] >= 0 ) ? mini[3] : 0;
maxi[0] = ( maxi[0] <= 255 ) ? maxi[0] : 255;
maxi[1] = ( maxi[1] <= 255 ) ? maxi[1] : 255;
maxi[3] = ( maxi[3] <= 255 ) ? maxi[3] : 255;
minColor[0] = ( mini[0] & C565_5_MASK ) | ( mini[0] >> 5 );
minColor[1] = ( mini[1] & C565_6_MASK ) | ( mini[1] >> 6 );
minColor[3] = mini[3];
maxColor[0] = ( maxi[0] & C565_5_MASK ) | ( maxi[0] >> 5 );
maxColor[1] = ( maxi[1] & C565_6_MASK ) | ( maxi[1] >> 6 );
maxColor[3] = maxi[3];
}
static void SelectYCoCgDiagonal( const byte *colorBlock, byte *minColor, byte *maxColor ) {
byte mid0 = ( (int) minColor[0] + maxColor[0] + 1 ) >> 1;
byte mid1 = ( (int) minColor[1] + maxColor[1] + 1 ) >> 1;
byte side = 0;
for ( int i = 0; i < 16; i++ ) {
byte b0 = colorBlock[i*4+0] >= mid0;
byte b1 = colorBlock[i*4+1] >= mid1;
side += ( b0 ^ b1 );
}
byte mask = -( side > 8 );
#ifdef NVIDIA_G7X_HARDWARE_BUG_FIX
mask &= -( minColor[0] != maxColor[0] );
#endif
byte c0 = minColor[1];
byte c1 = maxColor[1];
// PlayStation 3 compiler warning fix:
// c0 ^= c1 ^= mask &= c0 ^= c1; // Orignial code
byte c2 = c0 ^ c1;
c0 = c2;
c0 ^= c1 ^= mask &=c2;
minColor[1] = c0;
maxColor[1] = c1;
}
static void EmitAlphaIndices( const byte *colorBlock, const byte minAlpha, const byte maxAlpha, byte **outData ) {
const int ALPHA_RANGE = 7;
byte mid, ab1, ab2, ab3, ab4, ab5, ab6, ab7;
byte indexes[16];
mid = ( maxAlpha - minAlpha ) / ( 2 * ALPHA_RANGE );
ab1 = minAlpha + mid;
ab2 = ( 6 * maxAlpha + 1 * minAlpha ) / ALPHA_RANGE + mid;
ab3 = ( 5 * maxAlpha + 2 * minAlpha ) / ALPHA_RANGE + mid;
ab4 = ( 4 * maxAlpha + 3 * minAlpha ) / ALPHA_RANGE + mid;
ab5 = ( 3 * maxAlpha + 4 * minAlpha ) / ALPHA_RANGE + mid;
ab6 = ( 2 * maxAlpha + 5 * minAlpha ) / ALPHA_RANGE + mid;
ab7 = ( 1 * maxAlpha + 6 * minAlpha ) / ALPHA_RANGE + mid;
for ( int i = 0; i < 16; i++ ) {
byte a = colorBlock[i*4+3]; // Here it seems to be using the Y (luna) for the alpha
int b1 = ( a <= ab1 );
int b2 = ( a <= ab2 );
int b3 = ( a <= ab3 );
int b4 = ( a <= ab4 );
int b5 = ( a <= ab5 );
int b6 = ( a <= ab6 );
int b7 = ( a <= ab7 );
int index = ( b1 + b2 + b3 + b4 + b5 + b6 + b7 + 1 ) & 7;
indexes[i] = index ^ ( 2 > index );
}
EmitByte( (indexes[ 0] >> 0) | (indexes[ 1] << 3) | (indexes[ 2] << 6), outData );
EmitByte( (indexes[ 2] >> 2) | (indexes[ 3] << 1) | (indexes[ 4] << 4) | (indexes[ 5] << 7), outData );
EmitByte( (indexes[ 5] >> 1) | (indexes[ 6] << 2) | (indexes[ 7] << 5), outData );
EmitByte( (indexes[ 8] >> 0) | (indexes[ 9] << 3) | (indexes[10] << 6), outData );
EmitByte( (indexes[10] >> 2) | (indexes[11] << 1) | (indexes[12] << 4) | (indexes[13] << 7), outData );
EmitByte( (indexes[13] >> 1) | (indexes[14] << 2) | (indexes[15] << 5), outData );
}
static void EmitColorIndices( const byte *colorBlock, const byte *minColor, const byte *maxColor, byte **outData ) {
word colors[4][4];
unsigned int result = 0;
colors[0][0] = ( maxColor[0] & C565_5_MASK ) | ( maxColor[0] >> 5 );
colors[0][1] = ( maxColor[1] & C565_6_MASK ) | ( maxColor[1] >> 6 );
colors[0][2] = ( maxColor[2] & C565_5_MASK ) | ( maxColor[2] >> 5 );
colors[0][3] = 0;
colors[1][0] = ( minColor[0] & C565_5_MASK ) | ( minColor[0] >> 5 );
colors[1][1] = ( minColor[1] & C565_6_MASK ) | ( minColor[1] >> 6 );
colors[1][2] = ( minColor[2] & C565_5_MASK ) | ( minColor[2] >> 5 );
colors[1][3] = 0;
colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
colors[2][3] = 0;
colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
colors[3][3] = 0;
for ( int i = 15; i >= 0; i-- ) {
int c0, c1;
c0 = colorBlock[i*4+0];
c1 = colorBlock[i*4+1];
int d0 = absEA( colors[0][0] - c0 ) + absEA( colors[0][1] - c1 );
int d1 = absEA( colors[1][0] - c0 ) + absEA( colors[1][1] - c1 );
int d2 = absEA( colors[2][0] - c0 ) + absEA( colors[2][1] - c1 );
int d3 = absEA( colors[3][0] - c0 ) + absEA( colors[3][1] - c1 );
bool b0 = d0 > d3;
bool b1 = d1 > d2;
bool b2 = d0 > d2;
bool b3 = d1 > d3;
bool b4 = d2 > d3;
int x0 = b1 & b2;
int x1 = b0 & b3;
int x2 = b0 & b4;
int indexFinal = ( x2 | ( ( x0 | x1 ) << 1 ) ) << ( i << 1 );
result |= indexFinal;
}
EmitUInt( result, outData );
}
/*F*************************************************************************************************/
/*!
\Function CompressYCoCgDXT5( const byte *inBuf, byte *outBuf, const int width, const int height, const int stride )
\Description This is the C version of the YcoCgDXT5.
Input data needs to be converted from ARGB to YCoCg before calling this function.
Does not support alpha at all since it uses the alpha channel to store the Y (luma).
The output size is 4:1 but will be based on rounded up texture sizes on 4 texel boundaries
So for example if the source texture is 33 x 32, the compressed size will be 36x32.
The DXT5 compresses groups of 4x4 texels into 16 bytes (4:1 saving)
The compressed format:
2 bytes of min and max Y luma values (these are used to rebuild an 8 element Luma table)
6 bytes of indexes into the luma table
3 bits per index so 16 indexes total
2 shorts of min and max color values (these are used to rebuild a 4 element chroma table)
5 bits Co
6 bits Cg
5 bits Scale. The scale can only be 1, 2 or 4.
4 bytes of indexes into the Chroma CocG table
2 bits per index so 16 indexes total
\Input const byte *inBuf Input buffer of the YCoCG textel data
\Input const byte *outBuf Output buffer for the compressed data
\Input int width in source width
\Input int height in source height
\Input int stride in source in buffer stride in bytes
\Output int ouput size
\Version 1.1 CSidhall 01/12/09 modified to account for non aligned textures
1.2 1/10/10 Added stride
*/
/*************************************************************************************************F*/
extern "C" int CompressYCoCgDXT5( const byte *inBuf, byte *outBuf, const int width, const int height , const int stride) {
int outputBytes =0;
byte block[64];
byte minColor[4];
byte maxColor[4];
byte *outData = outBuf;
int blockLineSize = stride * 4; // 4 lines per loop
for ( int j = 0; j < height; j += 4, inBuf +=blockLineSize ) {
int heightRemain = height - j;
for ( int i = 0; i < width; i += 4 ) {
// Note: Modified from orignal source so that it can handle the edge blending better with non aligned 4x textures
int widthRemain = width - i;
if ((heightRemain < 4) || (widthRemain < 4) ) {
ExtractBlock( inBuf + i * 4, stride, widthRemain, heightRemain, block );
}
else {
ExtractBlock( inBuf + i * 4, stride, block );
}
// A simple min max extract for each color channel including alpha
GetMinMaxYCoCg( block, minColor, maxColor );
ScaleYCoCg( block, minColor, maxColor ); // Sets the scale in the min[2] and max[2] offset
InsetYCoCgBBox( minColor, maxColor );
SelectYCoCgDiagonal( block, minColor, maxColor );
EmitByte( maxColor[3], &outData ); // Note: the luma is stored in the alpha channel
EmitByte( minColor[3], &outData );
EmitAlphaIndices( block, minColor[3], maxColor[3], &outData );
EmitUShort( ColorTo565( maxColor ), &outData );
EmitUShort( ColorTo565( minColor ), &outData );
EmitColorIndices( block, minColor, maxColor, &outData );
}
}
outputBytes = (int)(outData - outBuf);
return outputBytes;
}
//--- YCoCgDXT5 Decompression ---
static void RestoreLumaAlphaBlock( const void * pSource, byte * colorBlock){
byte *pS=(unsigned char *) pSource;
byte luma[8];
// Grabbed this standard table building from undxt.cpp UnInterpolatedAlphaBlock()
luma[0] = *pS++;
luma[1] = *pS++;
luma[2] = (byte)((6 * luma[0] + 1 * luma[1] + 3) / 7);
luma[3] = (byte)((5 * luma[0] + 2 * luma[1] + 3) / 7);
luma[4] = (byte)((4 * luma[0] + 3 * luma[1] + 3) / 7);
luma[5] = (byte)((3 * luma[0] + 4 * luma[1] + 3) / 7);
luma[6] = (byte)((2 * luma[0] + 5 * luma[1] + 3) / 7);
luma[7] = (byte)((1 * luma[0] + 6 * luma[1] + 3) / 7);
int rawIndexes;
int raw;
int colorIndex=3;
// We have 6 bytes of indexes (3 bits * 16 texels)
// Easier to process in 2 groups of 8 texels...
for(int j=0; j < 2; j++) {
// Pack the indexes so we can shift out the indexes as a group
rawIndexes = *pS++;
raw = *pS++;
rawIndexes |= raw << 8;
raw = *pS++;
rawIndexes |= raw << 16;
// Since we still have to operate on the texels, just store it in a linear array workspace
for(int i=0; i < 8; i++) {
static const int LUMA_INDEX_FILTER = 0x7; // To isolate the 3 bit luma index
byte index = (byte)(rawIndexes & LUMA_INDEX_FILTER);
colorBlock[colorIndex] = luma[index];
colorIndex += 4;
rawIndexes >>=3;
}
}
}
// Converts a 5.6.5 short back into 3 bytes
static ALWAYS_INLINE void Convert565ToColor( const unsigned short value , byte *pOutColor )
{
int c = value >> (5+6);
pOutColor[0] = c << 3; // Was a 5 bit so scale back up
c = value >> 5;
c &=0x3f; // Filter out the top value
pOutColor[1] = c << 2; // Was a 6 bit
c = value & 0x1f; // Filter out the top values
pOutColor[2] = c << 3; // was a 5 bit so scale back up
}
#ifndef EA_SYSTEM_LITTLE_ENDIAN
// Flip around the 2 bytes in a short
static ALWAYS_INLINE short ShortFlipBytes( short raw )
{
return ((raw >> 8) & 0xff) | (raw << 8);
}
#endif
static void RestoreChromaBlock( const void * pSource, byte *colorBlock)
{
unsigned short *pS =(unsigned short *) pSource;
pS +=4; // Color info stars after 8 bytes (first 8 is the Y/alpha channel info)
unsigned short rawColor = *pS++;
#ifndef EA_SYSTEM_LITTLE_ENDIAN
rawColor = ShortFlipBytes(rawColor);
#endif
byte color[4][4]; // Color workspace
// Build the color lookup table
// The luma should have already been extracted and sitting at offset[3]
Convert565ToColor( rawColor , &color[0][0] );
rawColor = *pS++;
#ifndef EA_SYSTEM_LITTLE_ENDIAN
rawColor = ShortFlipBytes(rawColor);
#endif
Convert565ToColor( rawColor , &color[1][0] );
// EA/Alex Mole: mixing float & int operations is horrifyingly slow on some platforms, so we do it different!
#if defined(__PPU__) || defined(_XBOX)
color[2][0] = (byte) ( ( ((int)color[0][0] * 3) + ((int)color[1][0] ) ) >> 2 );
color[2][1] = (byte) ( ( ((int)color[0][1] * 3) + ((int)color[1][1] ) ) >> 2 );
color[3][0] = (byte) ( ( ((int)color[0][0] ) + ((int)color[1][0] * 3) ) >> 2 );
color[3][1] = (byte) ( ( ((int)color[0][1] ) + ((int)color[1][1] * 3) ) >> 2 );
#else
color[2][0] = (byte) ( (color[0][0] * 0.75f) + (color[1][0] * 0.25f) );
color[2][1] = (byte) ( (color[0][1] * 0.75f) + (color[1][1] * 0.25f) );
color[3][0] = (byte) ( (color[0][0] * 0.25f) + (color[1][0] * 0.75f) );
color[3][1] = (byte) ( (color[0][1] * 0.25f) + (color[1][1] * 0.75f) );
#endif
byte scale = ((color[0][2] >> 3) + 1) >> 1; // Adjust for shifts instead of divide
// Scale back values here so we don't have to do it for all 16 texels
// Note: This is really only for the software version. In hardware, the scale would need to be restored during the YCoCg to RGB conversion.
for(int i=0; i < 4; i++) {
color[i][0] = ((color[i][0] - 128) >> scale) + 128;
color[i][1] = ((color[i][1] - 128) >> scale) + 128;
}
// Rebuild the color block using the indexes (2 bits per texel)
int rawIndexes;
int colorIndex=0;
// We have 2 shorts of indexes (2 bits * 16 texels = 32 bits). (If can confirm 4x alignment, can grab it as a word with single loop)
for(int j=0; j < 2; j++) {
rawIndexes = *pS++;
#ifndef EA_SYSTEM_LITTLE_ENDIAN
rawIndexes = ShortFlipBytes(rawIndexes);
#endif
// Since we still have to operate on block, just store it in a linear array workspace
for(int i=0; i < 8; i++) {
static const int COCG_INDEX_FILTER = 0x3; // To isolate the 2 bit chroma index
unsigned char index = (unsigned char)(rawIndexes & COCG_INDEX_FILTER);
colorBlock[colorIndex] = color[index][0];
colorBlock[colorIndex+1] = color[index][1];
colorBlock[colorIndex+2] = 255;
colorIndex += 4;
rawIndexes >>=2;
}
}
}
// This stores a 4x4 texel block but can overflow the output rectangle size if it is not 4 texels aligned in size
static int ALWAYS_INLINE StoreBlock( const byte *colorBlock, const int stride, byte *outPtr ) {
for ( int j = 0; j < 4; j++ ) {
memcpy( (void*) outPtr,&colorBlock[j*4*4], 4*4 );
outPtr += stride;
}
return 64;
}
// This store only the texels that are within the width and height boundaries so does not overflow
static int StoreBlock( const byte *colorBlock , const int stride, const int widthRemain, const int heightRemain, byte *outPtr)
{
int outCount =0;
int width = stride >> 2; // Convert to int offsets
int *pBlock32 = (int *) colorBlock; // Since we are using ARGB, we assume 4 byte alignment is already being used
int *pOutput32 = (int*) outPtr;
int widthMax = 4;
if(widthRemain < 4) {
widthMax = widthRemain;
}
int heightMax = 4;
if(heightRemain < 4) {
heightMax = heightRemain;
}
for(int j =0; j < heightMax; j++) {
for(int i=0; i < widthMax; i++) {
pOutput32[i] = pBlock32[i];
outCount +=4;
}
// Set up offset for next texel row source (keep existing if we are at the end)
pBlock32 +=4;
pOutput32 +=width;
}
return outCount;
}
/*F*************************************************************************************************/
/*!
\Function DeCompressYCoCgDXT5( const byte *inBuf, byte *outBuf, const int width, const int height, const int stride )
\Description Decompression for YCoCgDXT5
Bascially does the reverse order of he compression.
Ouptut data still needs to be converted from YCoCg to ARGB after this function has completed
(probably more efficient to convert it inside here but have not done so to stay closer to the orginal
sample code and just make it easier to follow).
16 bytes get unpacked into a 4x4 texel block (64 bytes output).
The compressed format:
2 bytes of min and max Y luma values (these are used to rebuild an 8 element Luma table)
6 bytes of indexes into the luma table
3 bits per index so 16 indexes total
2 shorts of min and max color values (these are used to rebuild a 4 element chroma table)
5 bits Co
6 bits Cg
5 bits Scale. The scale can only be 1, 2 or 4.
4 bytes of indexes into the Chroma CocG table
2 bits per index so 16 indexes total
\Input const byte *inBuf
\Input byte *outBuf,
\Input const int width
\input const int height
\input const int stride for inBuf
\Output int size output in bytes
\Version 1.0 01/12/09 Created
1.1 12/21/09 Alex Mole: removed branches from tight inner loop
1.2 11/10/10 CSidhall: Added stride for textures with different image and canvas sizes.
*/
/*************************************************************************************************F*/
extern "C" int DeCompressYCoCgDXT5( const byte *inBuf, byte *outBuf, const int width, const int height, const int stride )
{
byte colorBlock[64]; // 4x4 texel work space a linear array
int outByteCount =0;
const byte *pCurInBuffer = inBuf;
int blockLineSize = stride * 4; // 4 lines per loop
for( int j = 0; j < ( height & ~3 ); j += 4, outBuf += blockLineSize )
{
int i;
for( i = 0; i < ( width & ~3 ); i += 4 )
{
RestoreLumaAlphaBlock(pCurInBuffer, colorBlock);
RestoreChromaBlock(pCurInBuffer, colorBlock);
outByteCount += StoreBlock(colorBlock, stride, outBuf + i * 4);
pCurInBuffer += 16; // 16 bytes per block of compressed data
}
// Do we have some leftover columns?
if( width & 3 )
{
int widthRemain = width & 3;
RestoreLumaAlphaBlock(pCurInBuffer, colorBlock);
RestoreChromaBlock(pCurInBuffer, colorBlock);
outByteCount += StoreBlock(colorBlock , stride, widthRemain, 4 /* heightRemain >= 4 */, outBuf + i * 4);
pCurInBuffer += 16; // 16 bytes per block of compressed data
}
}
// Do we have some leftover lines?
if( height & 3 )
{
int heightRemain = height & 3;
for( int i = 0; i < width; i += 4 )
{
RestoreLumaAlphaBlock(pCurInBuffer, colorBlock);
RestoreChromaBlock(pCurInBuffer, colorBlock);
int widthRemain = width - i;
outByteCount += StoreBlock(colorBlock , stride, widthRemain, heightRemain, outBuf + i * 4);
pCurInBuffer += 16; // 16 bytes per block of compressed data
}
}
return outByteCount;
}