Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5361 | serge | 1 | /* |
2 | * Intra predict 4 Intra_4x4 luma blocks |
||
3 | * Copyright © <2010>, Intel Corporation. |
||
4 | * |
||
5 | * This program is licensed under the terms and conditions of the |
||
6 | * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at |
||
7 | * http://www.opensource.org/licenses/eclipse-1.0.php. |
||
8 | * |
||
9 | */ |
||
10 | #if !defined(__INTRA_PRED_4X4_Y_4__) // Make sure this is only included once |
||
11 | #define __INTRA_PRED_4X4_Y_4__ |
||
12 | |||
13 | // Module name: intra_Pred_4x4_Y_4.asm |
||
14 | // |
||
15 | // Intra predict 4 Intra_4x4 luma blocks |
||
16 | // |
||
17 | //-------------------------------------------------------------------------- |
||
18 | // Input data: |
||
19 | // |
||
20 | // REF_TOP: Top reference data stored in BYTE with p[-1,-1] at REF_TOP(0,-1) |
||
21 | // REF_LEFT: Left reference data stored in BYTE with p[-1,0] at REF_LEFT(0,0) |
||
22 | // PRED_MODE: Intra prediction mode stored in 4 words (4 LSB) |
||
23 | // REG_INTRA_PRED_AVAIL: Top/Left available flag, (Bit0: Left, Bit1: Top) |
||
24 | // |
||
25 | //-------------------------------------------------------------------------- |
||
26 | |||
27 | #undef INTRA_PRED_AVAIL |
||
28 | #undef INTRA_REF |
||
29 | #undef REF_LEFT_BACK |
||
30 | #undef REF_TMP |
||
31 | #undef REF_TMP1 |
||
32 | |||
33 | #define INTRA_PRED_AVAIL REG_INTRA_TEMP_2.8 |
||
34 | #define INTRA_REF REG_INTRA_TEMP_2 |
||
35 | #define REF_LEFT_BACK REG_INTRA_TEMP_8 |
||
36 | #define REF_TMP REG_INTRA_TEMP_3 |
||
37 | #define REF_TMP1 REG_INTRA_TEMP_4 |
||
38 | |||
39 | intra_Pred_4x4_Y_4: |
||
40 | |||
41 | mov (8) REF_LEFT_BACK<1>:ub REF_LEFT(0)REGION(8,1) // Store left referece data |
||
42 | // Set up pointers to each intra_4x4 prediction mode |
||
43 | // |
||
44 | and (4) PINTRA4X4_Y<1>:w PRED_MODE<4;4,1>:w 0x0F:w |
||
45 | add (4) INTRA_4X4_MODE(0) r[PINTRA4X4_Y, INTRA_4X4_OFFSET]<1,0>:ub INTRA_MODE<4;4,1>:ub |
||
46 | |||
47 | // Sub-block 0 ***************** |
||
48 | mov (1) INTRA_PRED_AVAIL<1>:w REG_INTRA_PRED_AVAIL<0;1,0>:w // Top/Left neighbor available flags |
||
49 | CALL_1(INTRA_4X4_MODE(0),1) |
||
50 | |||
51 | // Add error data to predicted intra data |
||
52 | ADD_ERROR_SB0: |
||
53 | add.sat (8) r[PPREDBUF_Y,PREDSUBBLK0]<2>:ub r[PERROR,ERRBLK0]<8;4,1>:w REG_INTRA_4X4_PRED<8;8,1>:w // Too bad indexed src can't |
||
54 | add.sat (8) r[PPREDBUF_Y,PREDSUBBLK0+16]<2>:ub r[PERROR,ERRBLK0+32]<8;4,1>:w REG_INTRA_4X4_PRED.8<8;8,1>:w // cross 2 GRFs |
||
55 | |||
56 | // Sub-block 1 ***************** |
||
57 | mov (16) REF_TOP0(0)<1> REF_TOP0(0,4)REGION(8,1) // Top reference data |
||
58 | mov (4) REF_LEFT(0)<1> r[PPREDBUF_Y,PREDSUBBLK0+6]<8;1,0>:ub // New left referece data from sub-block 0 |
||
59 | or (1) INTRA_PRED_AVAIL<1>:w REG_INTRA_PRED_AVAIL<0;1,0>:w 1:w // Left neighbor is available |
||
60 | CALL_1(INTRA_4X4_MODE(0,1),1) |
||
61 | |||
62 | // Add error data to predicted intra data |
||
63 | ADD_ERROR_SB1: |
||
64 | add.sat (8) r[PPREDBUF_Y,PREDSUBBLK1]<2>:ub r[PERROR,ERRBLK1]<8;4,1>:w REG_INTRA_4X4_PRED<8;8,1>:w // Too bad indexed src can't |
||
65 | add.sat (8) r[PPREDBUF_Y,PREDSUBBLK1+16]<2>:ub r[PERROR,ERRBLK1+32]<8;4,1>:w REG_INTRA_4X4_PRED.8<8;8,1>:w // cross 2 GRFs |
||
66 | |||
67 | // Sub-block 2 ***************** |
||
68 | mov (1) REF_TOP0(0,3)<1> REF_LEFT_BACK.3<0;1,0>:ub // Top-left reference data from stored left referece data |
||
69 | mov (4) REF_TOP0(0,4)<1> r[PPREDBUF_Y,PREDSUBBLK0+24]REGION(4,2):ub // Top reference data |
||
70 | mov (4) REF_TOP0(0,8)<1> r[PPREDBUF_Y,PREDSUBBLK0+24+32]REGION(4,2):ub // Too bad indexed src can't cross 2 GRFs |
||
71 | mov (4) REF_TOP0(0,12)<1> r[PPREDBUF_Y,PREDSUBBLK0+30+32]REGION(1,0):ub // Extended top-right reference data |
||
72 | mov (4) REF_LEFT(0)<1> REF_LEFT_BACK.4<4;4,1>:ub // From stored left referece data |
||
73 | or (1) INTRA_PRED_AVAIL<1>:w REG_INTRA_PRED_AVAIL<0;1,0>:w 2:w // Top neighbor is available |
||
74 | CALL_1(INTRA_4X4_MODE(0,2),1) |
||
75 | |||
76 | // Add error data to predicted intra data |
||
77 | ADD_ERROR_SB2: |
||
78 | add.sat (8) r[PPREDBUF_Y,PREDSUBBLK2]<2>:ub r[PERROR,ERRBLK2]<8;4,1>:w REG_INTRA_4X4_PRED<8;8,1>:w // Too bad indexed src can't |
||
79 | add.sat (8) r[PPREDBUF_Y,PREDSUBBLK2+16]<2>:ub r[PERROR,ERRBLK2+32]<8;4,1>:w REG_INTRA_4X4_PRED.8<8;8,1>:w // cross 2 GRFs |
||
80 | |||
81 | // Sub-block 3 ***************** |
||
82 | mov (16) REF_TOP0(0)<1> REF_TOP0(0,4)REGION(8,1) // Top reference data |
||
83 | mov (8) REF_TOP0(0,8)<1> REF_TOP0(0,7)<0;1,0> // Extended top-right reference data |
||
84 | mov (4) REF_LEFT(0)<1> r[PPREDBUF_Y,PREDSUBBLK2+6]<8;1,0>:ub // Left referece data from sub-block 0 |
||
85 | or (1) INTRA_PRED_AVAIL<1>:w REG_INTRA_PRED_AVAIL<0;1,0>:w 3:w // Top/Left neighbor are available |
||
86 | CALL_1(INTRA_4X4_MODE(0,3),1) |
||
87 | |||
88 | // Add error data to predicted intra data |
||
89 | ADD_ERROR_SB3: |
||
90 | add.sat (8) r[PPREDBUF_Y,PREDSUBBLK3]<2>:ub r[PERROR,ERRBLK3]<8;4,1>:w REG_INTRA_4X4_PRED<8;8,1>:w // Too bad indexed src can't |
||
91 | add.sat (8) r[PPREDBUF_Y,PREDSUBBLK3+16]<2>:ub r[PERROR,ERRBLK3+32]<8;4,1>:w REG_INTRA_4X4_PRED.8<8;8,1>:w // cross 2 GRFs |
||
92 | |||
93 | RETURN |
||
94 | |||
95 | //-------------------------------------------------------------------------- |
||
96 | // Actual module that performs Intra_4x4 prediction and construction |
||
97 | // |
||
98 | // REF_TOP: Top reference data stored in BYTE with p[-1,-1] at REF_TOP(0,-1) |
||
99 | // REF_LEFT: Left reference data stored in BYTE with p[-1,0] at REF_LEFT(0,0) |
||
100 | // PINTRA4X4_Y: Intra prediction mode |
||
101 | // INTRA_PRED_AVAIL: Top/Left available flag, (Bit0: Left, Bit1: Top) |
||
102 | // |
||
103 | // Output data: |
||
104 | // |
||
105 | // REG_INTRA_4X4_PRED: Predicted 4x4 block data stored in 1 GRF register |
||
106 | //-------------------------------------------------------------------------- |
||
107 | intra_Pred_4x4_Y: |
||
108 | // Mode 0 |
||
109 | INTRA_4X4_VERTICAL: |
||
110 | mov (16) REG_INTRA_4X4_PRED<1>:w REF_TOP(0)<0;4,1> |
||
111 | RETURN_1 |
||
112 | |||
113 | // Mode 1 |
||
114 | INTRA_4X4_HORIZONTAL: |
||
115 | mov (16) REG_INTRA_4X4_PRED<1>:w REF_LEFT(0)<1;4,0> |
||
116 | RETURN_1 |
||
117 | |||
118 | // Mode 2 |
||
119 | INTRA_4X4_DC: |
||
120 | // Rearrange reference samples for unified DC prediction code |
||
121 | // |
||
122 | and.nz.f0.0 (16) NULLREG INTRA_PRED_AVAIL<0;1,0>:w 2:w {Compr} |
||
123 | and.nz.f0.1 (16) NULLREG INTRA_PRED_AVAIL<0;1,0>:w 1:w {Compr} |
||
124 | (-f0.0.any16h) mov (16) REF_TOP_W(0)<1> 0x8080:uw // Top macroblock not available for intra prediction |
||
125 | (-f0.1.any8h) mov (8) REF_LEFT(0)<1> REF_TOP(0)REGION(8,1) // Left macroblock not available for intra prediction |
||
126 | (-f0.0.any8h) mov (8) REF_TOP(0)<1> REF_LEFT(0)REGION(8,1) // Top macroblock not available for intra prediction |
||
127 | // Perform DC prediction |
||
128 | // |
||
129 | add (4) PRED_YW(15)<1> REF_TOP(0)REGION(4,1) REF_LEFT(0)REGION(4,1) |
||
130 | add (2) PRED_YW(15)<1> PRED_YW(15)REGION(2,1) PRED_YW(15,2)REGION(2,1) |
||
131 | add (16) acc0<1>:w PRED_YW(15)REGION(1,0) PRED_YW(15,1)REGION(1,0) |
||
132 | add (16) acc0<1>:w acc0:w 4:w |
||
133 | shr (16) REG_INTRA_4X4_PRED<1>:w acc0:w 3:w |
||
134 | RETURN_1 |
||
135 | |||
136 | // Mode 3 |
||
137 | INTRA_4X4_DIAG_DOWN_LEFT: |
||
138 | mov (8) INTRA_REF<1>:ub REF_TOP(0)REGION(8,1) // Keep REF_TOP untouched for future use |
||
139 | mov (4) INTRA_REF.8<1>:ub REF_TOP(0,7)REGION(4,1) // p[8,-1] = p[7,-1] |
||
140 | add (8) acc0<1>:w INTRA_REF.2<8;8,1> 2:w // p[x+2]+2 |
||
141 | mac (8) acc0<1>:w INTRA_REF.1<8;8,1> 2:w // 2*p[x+1]+p[x+2]+2 |
||
142 | mac (8) PRED_YW(15)<1> INTRA_REF.0<8;8,1> 1:w // p[x]+2*p[x+1]+p[x+2]+2 |
||
143 | |||
144 | shr (16) REG_INTRA_4X4_PRED<1>:w PRED_YW(15)<1;4,1> 2:w // (p[x]+2*p[x+1]+p[x+2]+2)>>2 |
||
145 | RETURN_1 |
||
146 | |||
147 | // Mode 4 |
||
148 | INTRA_4X4_DIAG_DOWN_RIGHT: |
||
149 | |||
150 | // Set inverse shift count |
||
151 | shl (4) REF_TMP<1>:ud REF_LEFT_D(0)REGION(1,0) INV_SHIFT<4;4,1>:b |
||
152 | mov (8) INTRA_REF.4<1>:ub REF_TOP(0,-1)REGION(8,1) // INTRA_REF holds all reference data |
||
153 | mov (4) INTRA_REF<1>:ub REF_TMP.3<16;4,4>:ub |
||
154 | |||
155 | add (8) acc0<1>:w INTRA_REF.2<8;8,1>:ub 2:w // p[x+2]+2 |
||
156 | mac (8) acc0<1>:w INTRA_REF.1<8;8,1>:ub 2:w // 2*p[x+1]+p[x+2]+2 |
||
157 | mac (8) INTRA_REF<1>:w INTRA_REF<8;8,1>:ub 1:w // p[x]+2*p[x+1]+p[x+2]+2 |
||
158 | |||
159 | // Store data in reversed order |
||
160 | add (4) PBWDCOPY_4<1>:w INV_TRANS4<4;4,1>:b INTRA_TEMP_2*GRFWIB:w // Must match with INTRA_REF |
||
161 | shr (16) REG_INTRA_4X4_PRED<1>:w r[PBWDCOPY_4]<4,1>:w 2:w |
||
162 | RETURN_1 |
||
163 | |||
164 | // Mode 5 |
||
165 | INTRA_4X4_VERT_RIGHT: |
||
166 | |||
167 | // Set inverse shift count |
||
168 | shl (4) REF_TMP<1>:ud REF_LEFT_D(0)REGION(1,0) INV_SHIFT<4;4,1>:b |
||
169 | mov (8) INTRA_REF.4<1>:ub REF_TOP(0,-1)REGION(8,1) // INTRA_REF holds all reference data |
||
170 | mov (4) INTRA_REF<1>:ub REF_TMP.3<16;4,4>:ub |
||
171 | |||
172 | // Even rows |
||
173 | avg (8) PRED_YW(14)<1> INTRA_REF.4<8;8,1> INTRA_REF.5<8;8,1> // avg(p[x-1],p[x]) |
||
174 | // Odd rows |
||
175 | add (8) acc0<1>:w INTRA_REF.3<8;8,1>:ub 2:w // p[x]+2 |
||
176 | mac (8) acc0<1>:w INTRA_REF.2<8;8,1>:ub 2:w // 2*p[x-1]+p[x]+2 |
||
177 | mac (8) acc0<1>:w INTRA_REF.1<8;8,1>:ub 1:w // p[x-2]+2*p[x-1]+p[x]+2 |
||
178 | shr (8) INTRA_REF<1>:w acc0:w 2:w // (p[x-2]+2*p[x-1]+p[x]+2)>>2 |
||
179 | |||
180 | mov (4) INTRA_REF.2<2>:w INTRA_REF.2<4;4,1>:w // Keep zVR = -2,-3 unchanged |
||
181 | mov (4) INTRA_REF.3<2>:w PRED_YW(14)REGION(4,1) // Combining even rows |
||
182 | |||
183 | add (4) PBWDCOPY_4<1>:w INV_TRANS4<4;4,1>:b INTRA_TEMP_2*GRFWIB:w // Must match with INTRA_REF |
||
184 | mov (16) REG_INTRA_4X4_PRED<1>:w r[PBWDCOPY_4]<4,2>:w |
||
185 | RETURN_1 |
||
186 | |||
187 | // Mode 6 |
||
188 | INTRA_4X4_HOR_DOWN: |
||
189 | // Set inverse shift count |
||
190 | shl (4) REF_TMP<1>:ud REF_LEFT_D(0)REGION(1,0) INV_SHIFT<4;4,1>:b |
||
191 | mov (8) INTRA_REF.4<1>:ub REF_TOP(0,-1)REGION(8,1) // INTRA_REF holds all reference data |
||
192 | mov (4) INTRA_REF<1>:ub REF_TMP.3<16;4,4>:ub |
||
193 | |||
194 | // Even pixels |
||
195 | avg (8) PRED_YW(14)<1> INTRA_REF<8;8,1> INTRA_REF.1<8;8,1> // avg(p[y-1],p[y]) |
||
196 | // Odd pixels |
||
197 | add (8) acc0<1>:w INTRA_REF.2<8;8,1>:ub 2:w // p[y]+2 |
||
198 | mac (8) acc0<1>:w INTRA_REF.1<8;8,1>:ub 2:w // 2*p[y-1]+p[y]+2 |
||
199 | mac (8) REF_TMP<1>:w INTRA_REF.0<8;8,1>:ub 1:w // p[y-2]+2*p[y-1]+p[y]+2 |
||
200 | shr (4) INTRA_REF.1<2>:w REF_TMP<4;4,1>:w 2:w // (p[y-2]+2*p[y-1]+p[y]+2)>>2 |
||
201 | |||
202 | shr (2) INTRA_REF.8<1>:w REF_TMP.4<2;2,1>:w 2:w // Keep zVR = -2,-3 unchanged |
||
203 | mov (4) INTRA_REF.0<2>:w PRED_YW(14)REGION(4,1) // Combining even pixels |
||
204 | |||
205 | shl (4) PBWDCOPY_4<1>:w INV_TRANS4<4;4,1>:b 1:w // Convert to WORD offset |
||
206 | add (4) PBWDCOPY_4<1>:w PBWDCOPY_4<4;4,1>:w INTRA_TEMP_2*GRFWIB:w // Must match with INTRA_REF |
||
207 | mov (16) REG_INTRA_4X4_PRED<1>:w r[PBWDCOPY_4]<4,1>:w |
||
208 | RETURN_1 |
||
209 | |||
210 | // Mode 7 |
||
211 | INTRA_4X4_VERT_LEFT: |
||
212 | // Even rows |
||
213 | avg (8) PRED_YW(14)<2> REF_TOP(0)REGION(8,1) REF_TOP(0,1)REGION(8,1) // avg(p[x],p[x+1]) |
||
214 | // Odd rows |
||
215 | add (8) acc0<1>:w REF_TOP(0,2)REGION(8,1) 2:w // p[x+2]+2 |
||
216 | mac (8) acc0<1>:w REF_TOP(0,1)REGION(8,1) 2:w // 2*p[x+1]+p[x+2]+2 |
||
217 | mac (8) PRED_YW(15)<1> REF_TOP(0)REGION(8,1) 1:w // p[x]+2*p[x+1]+p[x+2]+2 |
||
218 | shr (8) PRED_YW(14,1)<2> PRED_YW(15)REGION(8,1) 2:w |
||
219 | |||
220 | mov (16) REG_INTRA_4X4_PRED<1>:w PRED_YW(14)<1;4,2> |
||
221 | RETURN_1 |
||
222 | |||
223 | // Mode 8 |
||
224 | INTRA_4X4_HOR_UP: |
||
225 | // Set extra left reference pixels for unified prediction |
||
226 | mov (8) REF_LEFT(0,4)<1> REF_LEFT(0,3)REGION(1,0) // Copy p[-1,3] to p[-1,y],y=4...7 |
||
227 | // Even pixels |
||
228 | avg (8) PRED_YW(14)<2> REF_LEFT(0)REGION(8,1) REF_LEFT(0,1)REGION(8,1) // avg(p[y],p[y+1]) |
||
229 | // Odd pixels |
||
230 | add (8) acc0<1>:w REF_LEFT(0,2)REGION(8,1) 2:w // p[y+2]+2 |
||
231 | mac (8) acc0<1>:w REF_LEFT(0,1)REGION(8,1) 2:w // 2*p[y+1]+p[y+2]+2 |
||
232 | mac (8) PRED_YW(15)<1> REF_LEFT(0)REGION(8,1) 1:w // p[y]+2*p[y+1]+p[y+2]+2 |
||
233 | shr (8) PRED_YW(14,1)<2> PRED_YW(15)REGION(8,1) 2:w // (p[y]+2*p[y+1]+p[y+2]+2)>>2 |
||
234 | |||
235 | mov (16) REG_INTRA_4X4_PRED<1>:w PRED_YW(14)<2;4,1> |
||
236 | RETURN_1 |
||
237 | |||
238 | // End of intra_Pred_4x4_Y_4 |
||
239 | |||
240 | #endif // !defined(__INTRA_PRED_4X4_Y_4__)2;4,1>1>2>1>1>1>2>1>1;4,2>1>2>1>1>1>2>4,1>1>4;4,1>1>4;4,1>1>2>2;2,1>1>4;4,1>2>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>8;8,1>1>16;4,4>1>1>4;4,1>1>4,2>1>4;4,1>1>2>4;4,1>2>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>8;8,1>1>16;4,4>1>1>4;4,1>1>4,1>1>4;4,1>1>8;8,1>1>8;8,1>1>8;8,1>1>16;4,4>1>1>4;4,1>1>1;4,1>1>8;8,1>1>8;8,1>1>8;8,1>1>1>1>1>1>1>1>1>1>1>1>0;1,0>0;1,0>1;4,0>1>0;4,1>1>8;8,1>8;4,1>2>8;8,1>8;4,1>2>0;1,0>1>8;1,0>1>0;1,0>1>1>8;8,1>8;4,1>2>8;8,1>8;4,1>2>0;1,0>1>4;4,1>1>1>1>1>0;1,0>1>8;8,1>8;4,1>2>8;8,1>8;4,1>2>0;1,0>1>8;1,0>1>1>8;8,1>8;4,1>2>8;8,1>8;4,1>2>0;1,0>1>4;4,1>1,0>4;4,1>1>1>2010> |