Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright © 2014 Broadcom |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||
21 | * IN THE SOFTWARE. |
||
22 | */ |
||
23 | |||
24 | #include "vc4_qpu.h" |
||
25 | |||
26 | static bool |
||
27 | writes_reg(uint64_t inst, uint32_t w) |
||
28 | { |
||
29 | return (QPU_GET_FIELD(inst, QPU_WADDR_ADD) == w || |
||
30 | QPU_GET_FIELD(inst, QPU_WADDR_MUL) == w); |
||
31 | } |
||
32 | |||
33 | static bool |
||
34 | _reads_reg(uint64_t inst, uint32_t r, bool ignore_a, bool ignore_b) |
||
35 | { |
||
36 | struct { |
||
37 | uint32_t mux, addr; |
||
38 | } src_regs[] = { |
||
39 | { QPU_GET_FIELD(inst, QPU_ADD_A) }, |
||
40 | { QPU_GET_FIELD(inst, QPU_ADD_B) }, |
||
41 | { QPU_GET_FIELD(inst, QPU_MUL_A) }, |
||
42 | { QPU_GET_FIELD(inst, QPU_MUL_B) }, |
||
43 | }; |
||
44 | |||
45 | for (int i = 0; i < ARRAY_SIZE(src_regs); i++) { |
||
46 | if (!ignore_a && |
||
47 | src_regs[i].mux == QPU_MUX_A && |
||
48 | (QPU_GET_FIELD(inst, QPU_RADDR_A) == r)) |
||
49 | return true; |
||
50 | |||
51 | if (!ignore_b && |
||
52 | QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM && |
||
53 | src_regs[i].mux == QPU_MUX_B && |
||
54 | (QPU_GET_FIELD(inst, QPU_RADDR_B) == r)) |
||
55 | return true; |
||
56 | } |
||
57 | |||
58 | return false; |
||
59 | } |
||
60 | |||
61 | static bool |
||
62 | reads_reg(uint64_t inst, uint32_t r) |
||
63 | { |
||
64 | return _reads_reg(inst, r, false, false); |
||
65 | } |
||
66 | |||
67 | static bool |
||
68 | reads_a_reg(uint64_t inst, uint32_t r) |
||
69 | { |
||
70 | return _reads_reg(inst, r, false, true); |
||
71 | } |
||
72 | |||
73 | static bool |
||
74 | reads_b_reg(uint64_t inst, uint32_t r) |
||
75 | { |
||
76 | return _reads_reg(inst, r, true, false); |
||
77 | } |
||
78 | |||
79 | static bool |
||
80 | writes_sfu(uint64_t inst) |
||
81 | { |
||
82 | return (writes_reg(inst, QPU_W_SFU_RECIP) || |
||
83 | writes_reg(inst, QPU_W_SFU_RECIPSQRT) || |
||
84 | writes_reg(inst, QPU_W_SFU_EXP) || |
||
85 | writes_reg(inst, QPU_W_SFU_LOG)); |
||
86 | } |
||
87 | |||
88 | /** |
||
89 | * Checks for the instruction restrictions from page 37 ("Summary of |
||
90 | * Instruction Restrictions"). |
||
91 | */ |
||
92 | void |
||
93 | vc4_qpu_validate(uint64_t *insts, uint32_t num_inst) |
||
94 | { |
||
95 | bool scoreboard_locked = false; |
||
96 | |||
97 | for (int i = 0; i < num_inst; i++) { |
||
98 | uint64_t inst = insts[i]; |
||
99 | |||
100 | if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END) { |
||
101 | if (qpu_inst_is_tlb(inst)) |
||
102 | scoreboard_locked = true; |
||
103 | |||
104 | continue; |
||
105 | } |
||
106 | |||
107 | /* "The Thread End instruction must not write to either physical |
||
108 | * regfile A or B." |
||
109 | */ |
||
110 | assert(QPU_GET_FIELD(inst, QPU_WADDR_ADD) >= 32); |
||
111 | assert(QPU_GET_FIELD(inst, QPU_WADDR_MUL) >= 32); |
||
112 | |||
113 | /* Can't trigger an implicit wait on scoreboard in the program |
||
114 | * end instruction. |
||
115 | */ |
||
116 | assert(!qpu_inst_is_tlb(inst) || scoreboard_locked); |
||
117 | |||
118 | /* Two delay slots will be executed. */ |
||
119 | assert(i + 2 <= num_inst); |
||
120 | |||
121 | for (int j = i; j < i + 2; j++) { |
||
122 | /* "The last three instructions of any program |
||
123 | * (Thread End plus the following two delay-slot |
||
124 | * instructions) must not do varyings read, uniforms |
||
125 | * read or any kind of VPM, VDR, or VDW read or |
||
126 | * write." |
||
127 | */ |
||
128 | assert(!writes_reg(insts[j], QPU_W_VPM)); |
||
129 | assert(!reads_reg(insts[j], QPU_R_VARY)); |
||
130 | assert(!reads_reg(insts[j], QPU_R_UNIF)); |
||
131 | assert(!reads_reg(insts[j], QPU_R_VPM)); |
||
132 | |||
133 | /* "The Thread End instruction and the following two |
||
134 | * delay slot instructions must not write or read |
||
135 | * address 14 in either regfile A or B." |
||
136 | */ |
||
137 | assert(!writes_reg(insts[j], 14)); |
||
138 | assert(!reads_reg(insts[j], 14)); |
||
139 | |||
140 | } |
||
141 | |||
142 | /* "The final program instruction (the second delay slot |
||
143 | * instruction) must not do a TLB Z write." |
||
144 | */ |
||
145 | assert(!writes_reg(insts[i + 2], QPU_W_TLB_Z)); |
||
146 | } |
||
147 | |||
148 | /* "A scoreboard wait must not occur in the first two instructions of |
||
149 | * a fragment shader. This is either the explicit Wait for Scoreboard |
||
150 | * signal or an implicit wait with the first tile-buffer read or |
||
151 | * write instruction." |
||
152 | */ |
||
153 | for (int i = 0; i < 2; i++) { |
||
154 | uint64_t inst = insts[i]; |
||
155 | |||
156 | assert(!qpu_inst_is_tlb(inst)); |
||
157 | } |
||
158 | |||
159 | /* "If TMU_NOSWAP is written, the write must be three instructions |
||
160 | * before the first TMU write instruction. For example, if |
||
161 | * TMU_NOSWAP is written in the first shader instruction, the first |
||
162 | * TMU write cannot occur before the 4th shader instruction." |
||
163 | */ |
||
164 | int last_tmu_noswap = -10; |
||
165 | for (int i = 0; i < num_inst; i++) { |
||
166 | uint64_t inst = insts[i]; |
||
167 | |||
168 | assert((i - last_tmu_noswap) > 3 || |
||
169 | (!writes_reg(inst, QPU_W_TMU0_S) && |
||
170 | !writes_reg(inst, QPU_W_TMU1_S))); |
||
171 | |||
172 | if (writes_reg(inst, QPU_W_TMU_NOSWAP)) |
||
173 | last_tmu_noswap = i; |
||
174 | } |
||
175 | |||
176 | /* "An instruction must not read from a location in physical regfile A |
||
177 | * or B that was written to by the previous instruction." |
||
178 | */ |
||
179 | for (int i = 0; i < num_inst - 1; i++) { |
||
180 | uint64_t inst = insts[i]; |
||
181 | uint32_t add_waddr = QPU_GET_FIELD(inst, QPU_WADDR_ADD); |
||
182 | uint32_t mul_waddr = QPU_GET_FIELD(inst, QPU_WADDR_MUL); |
||
183 | uint32_t waddr_a, waddr_b; |
||
184 | |||
185 | if (inst & QPU_WS) { |
||
186 | waddr_b = add_waddr; |
||
187 | waddr_a = mul_waddr; |
||
188 | } else { |
||
189 | waddr_a = add_waddr; |
||
190 | waddr_b = mul_waddr; |
||
191 | } |
||
192 | |||
193 | assert(waddr_a >= 32 || !reads_a_reg(insts[i + 1], waddr_a)); |
||
194 | assert(waddr_b >= 32 || !reads_b_reg(insts[i + 1], waddr_b)); |
||
195 | } |
||
196 | |||
197 | /* "After an SFU lookup instruction, accumulator r4 must not be read |
||
198 | * in the following two instructions. Any other instruction that |
||
199 | * results in r4 being written (that is, TMU read, TLB read, SFU |
||
200 | * lookup) cannot occur in the two instructions following an SFU |
||
201 | * lookup." |
||
202 | */ |
||
203 | int last_sfu_inst = -10; |
||
204 | for (int i = 0; i < num_inst - 1; i++) { |
||
205 | uint64_t inst = insts[i]; |
||
206 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); |
||
207 | |||
208 | assert(i - last_sfu_inst > 2 || |
||
209 | (!writes_sfu(inst) && |
||
210 | sig != QPU_SIG_LOAD_TMU0 && |
||
211 | sig != QPU_SIG_LOAD_TMU1 && |
||
212 | sig != QPU_SIG_COLOR_LOAD)); |
||
213 | |||
214 | if (writes_sfu(inst)) |
||
215 | last_sfu_inst = i; |
||
216 | } |
||
217 | |||
218 | int last_r5_write = -10; |
||
219 | for (int i = 0; i < num_inst - 1; i++) { |
||
220 | uint64_t inst = insts[i]; |
||
221 | |||
222 | /* "An instruction that does a vector rotate by r5 must not |
||
223 | * immediately follow an instruction that writes to r5." |
||
224 | */ |
||
225 | assert(last_r5_write != i - 1 || |
||
226 | QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM || |
||
227 | QPU_GET_FIELD(inst, QPU_SMALL_IMM) != 48); |
||
228 | } |
||
229 | |||
230 | /* "An instruction that does a vector rotate must not immediately |
||
231 | * follow an instruction that writes to the accumulator that is being |
||
232 | * rotated. |
||
233 | * |
||
234 | * XXX: TODO. |
||
235 | */ |
||
236 | |||
237 | /* "After an instruction that does a TLB Z write, the multisample mask |
||
238 | * must not be read as an instruction input argument in the following |
||
239 | * two instruction. The TLB Z write instruction can, however, be |
||
240 | * followed immediately by a TLB color write." |
||
241 | */ |
||
242 | for (int i = 0; i < num_inst - 1; i++) { |
||
243 | uint64_t inst = insts[i]; |
||
244 | if (writes_reg(inst, QPU_W_TLB_Z)) { |
||
245 | assert(!reads_a_reg(insts[i + 1], QPU_R_MS_REV_FLAGS)); |
||
246 | assert(!reads_a_reg(insts[i + 2], QPU_R_MS_REV_FLAGS)); |
||
247 | } |
||
248 | } |
||
249 | |||
250 | /* |
||
251 | * "A single instruction can only perform a maximum of one of the |
||
252 | * following closely coupled peripheral accesses in a single |
||
253 | * instruction: TMU write, TMU read, TLB write, TLB read, TLB |
||
254 | * combined color read and write, SFU write, Mutex read or Semaphore |
||
255 | * access." |
||
256 | */ |
||
257 | for (int i = 0; i < num_inst - 1; i++) { |
||
258 | uint64_t inst = insts[i]; |
||
259 | |||
260 | assert(qpu_num_sf_accesses(inst) <= 1); |
||
261 | } |
||
262 | }=>>>>>>>>>=>>> |