Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Copyright © 2014 Broadcom
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 */
23
 
24
#include "vc4_qpu.h"
25
 
26
static bool
27
writes_reg(uint64_t inst, uint32_t w)
28
{
29
        return (QPU_GET_FIELD(inst, QPU_WADDR_ADD) == w ||
30
                QPU_GET_FIELD(inst, QPU_WADDR_MUL) == w);
31
}
32
 
33
static bool
34
_reads_reg(uint64_t inst, uint32_t r, bool ignore_a, bool ignore_b)
35
{
36
        struct {
37
                uint32_t mux, addr;
38
        } src_regs[] = {
39
                { QPU_GET_FIELD(inst, QPU_ADD_A) },
40
                { QPU_GET_FIELD(inst, QPU_ADD_B) },
41
                { QPU_GET_FIELD(inst, QPU_MUL_A) },
42
                { QPU_GET_FIELD(inst, QPU_MUL_B) },
43
        };
44
 
45
        for (int i = 0; i < ARRAY_SIZE(src_regs); i++) {
46
                if (!ignore_a &&
47
                    src_regs[i].mux == QPU_MUX_A &&
48
                    (QPU_GET_FIELD(inst, QPU_RADDR_A) == r))
49
                        return true;
50
 
51
                if (!ignore_b &&
52
                    QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM &&
53
                    src_regs[i].mux == QPU_MUX_B &&
54
                    (QPU_GET_FIELD(inst, QPU_RADDR_B) == r))
55
                        return true;
56
        }
57
 
58
        return false;
59
}
60
 
61
static bool
62
reads_reg(uint64_t inst, uint32_t r)
63
{
64
        return _reads_reg(inst, r, false, false);
65
}
66
 
67
static bool
68
reads_a_reg(uint64_t inst, uint32_t r)
69
{
70
        return _reads_reg(inst, r, false, true);
71
}
72
 
73
static bool
74
reads_b_reg(uint64_t inst, uint32_t r)
75
{
76
        return _reads_reg(inst, r, true, false);
77
}
78
 
79
static bool
80
writes_sfu(uint64_t inst)
81
{
82
        return (writes_reg(inst, QPU_W_SFU_RECIP) ||
83
                writes_reg(inst, QPU_W_SFU_RECIPSQRT) ||
84
                writes_reg(inst, QPU_W_SFU_EXP) ||
85
                writes_reg(inst, QPU_W_SFU_LOG));
86
}
87
 
88
/**
89
 * Checks for the instruction restrictions from page 37 ("Summary of
90
 * Instruction Restrictions").
91
 */
92
void
93
vc4_qpu_validate(uint64_t *insts, uint32_t num_inst)
94
{
95
        bool scoreboard_locked = false;
96
 
97
        for (int i = 0; i < num_inst; i++) {
98
                uint64_t inst = insts[i];
99
 
100
                if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END) {
101
                        if (qpu_inst_is_tlb(inst))
102
                                scoreboard_locked = true;
103
 
104
                        continue;
105
                }
106
 
107
                /* "The Thread End instruction must not write to either physical
108
                 *  regfile A or B."
109
                 */
110
                assert(QPU_GET_FIELD(inst, QPU_WADDR_ADD) >= 32);
111
                assert(QPU_GET_FIELD(inst, QPU_WADDR_MUL) >= 32);
112
 
113
                /* Can't trigger an implicit wait on scoreboard in the program
114
                 * end instruction.
115
                 */
116
                assert(!qpu_inst_is_tlb(inst) || scoreboard_locked);
117
 
118
                /* Two delay slots will be executed. */
119
                assert(i + 2 <= num_inst);
120
 
121
                 for (int j = i; j < i + 2; j++) {
122
                         /* "The last three instructions of any program
123
                          *  (Thread End plus the following two delay-slot
124
                          *  instructions) must not do varyings read, uniforms
125
                          *  read or any kind of VPM, VDR, or VDW read or
126
                          *  write."
127
                          */
128
                         assert(!writes_reg(insts[j], QPU_W_VPM));
129
                         assert(!reads_reg(insts[j], QPU_R_VARY));
130
                         assert(!reads_reg(insts[j], QPU_R_UNIF));
131
                         assert(!reads_reg(insts[j], QPU_R_VPM));
132
 
133
                         /* "The Thread End instruction and the following two
134
                          *  delay slot instructions must not write or read
135
                          *  address 14 in either regfile A or B."
136
                          */
137
                         assert(!writes_reg(insts[j], 14));
138
                         assert(!reads_reg(insts[j], 14));
139
 
140
                 }
141
 
142
                 /* "The final program instruction (the second delay slot
143
                  *  instruction) must not do a TLB Z write."
144
                  */
145
                 assert(!writes_reg(insts[i + 2], QPU_W_TLB_Z));
146
        }
147
 
148
        /* "A scoreboard wait must not occur in the first two instructions of
149
         *  a fragment shader. This is either the explicit Wait for Scoreboard
150
         *  signal or an implicit wait with the first tile-buffer read or
151
         *  write instruction."
152
         */
153
        for (int i = 0; i < 2; i++) {
154
                uint64_t inst = insts[i];
155
 
156
                assert(!qpu_inst_is_tlb(inst));
157
        }
158
 
159
        /* "If TMU_NOSWAP is written, the write must be three instructions
160
         *  before the first TMU write instruction.  For example, if
161
         *  TMU_NOSWAP is written in the first shader instruction, the first
162
         *  TMU write cannot occur before the 4th shader instruction."
163
         */
164
        int last_tmu_noswap = -10;
165
        for (int i = 0; i < num_inst; i++) {
166
                uint64_t inst = insts[i];
167
 
168
                assert((i - last_tmu_noswap) > 3 ||
169
                       (!writes_reg(inst, QPU_W_TMU0_S) &&
170
                        !writes_reg(inst, QPU_W_TMU1_S)));
171
 
172
                if (writes_reg(inst, QPU_W_TMU_NOSWAP))
173
                    last_tmu_noswap = i;
174
        }
175
 
176
        /* "An instruction must not read from a location in physical regfile A
177
         *  or B that was written to by the previous instruction."
178
         */
179
        for (int i = 0; i < num_inst - 1; i++) {
180
                uint64_t inst = insts[i];
181
                uint32_t add_waddr = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
182
                uint32_t mul_waddr = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
183
                uint32_t waddr_a, waddr_b;
184
 
185
                if (inst & QPU_WS) {
186
                        waddr_b = add_waddr;
187
                        waddr_a = mul_waddr;
188
                } else {
189
                        waddr_a = add_waddr;
190
                        waddr_b = mul_waddr;
191
                }
192
 
193
                assert(waddr_a >= 32 || !reads_a_reg(insts[i + 1], waddr_a));
194
                assert(waddr_b >= 32 || !reads_b_reg(insts[i + 1], waddr_b));
195
        }
196
 
197
        /* "After an SFU lookup instruction, accumulator r4 must not be read
198
         *  in the following two instructions. Any other instruction that
199
         *  results in r4 being written (that is, TMU read, TLB read, SFU
200
         *  lookup) cannot occur in the two instructions following an SFU
201
         *  lookup."
202
         */
203
        int last_sfu_inst = -10;
204
        for (int i = 0; i < num_inst - 1; i++) {
205
                uint64_t inst = insts[i];
206
                uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
207
 
208
                assert(i - last_sfu_inst > 2 ||
209
                       (!writes_sfu(inst) &&
210
                        sig != QPU_SIG_LOAD_TMU0 &&
211
                        sig != QPU_SIG_LOAD_TMU1 &&
212
                        sig != QPU_SIG_COLOR_LOAD));
213
 
214
                if (writes_sfu(inst))
215
                        last_sfu_inst = i;
216
        }
217
 
218
        int last_r5_write = -10;
219
        for (int i = 0; i < num_inst - 1; i++) {
220
                uint64_t inst = insts[i];
221
 
222
                /* "An instruction that does a vector rotate by r5 must not
223
                 *  immediately follow an instruction that writes to r5."
224
                 */
225
                assert(last_r5_write != i - 1 ||
226
                       QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM ||
227
                       QPU_GET_FIELD(inst, QPU_SMALL_IMM) != 48);
228
        }
229
 
230
        /* "An instruction that does a vector rotate must not immediately
231
         *  follow an instruction that writes to the accumulator that is being
232
         *  rotated.
233
         *
234
         * XXX: TODO.
235
         */
236
 
237
        /* "After an instruction that does a TLB Z write, the multisample mask
238
         *  must not be read as an instruction input argument in the following
239
         *  two instruction. The TLB Z write instruction can, however, be
240
         *  followed immediately by a TLB color write."
241
         */
242
        for (int i = 0; i < num_inst - 1; i++) {
243
                uint64_t inst = insts[i];
244
                if (writes_reg(inst, QPU_W_TLB_Z)) {
245
                        assert(!reads_a_reg(insts[i + 1], QPU_R_MS_REV_FLAGS));
246
                        assert(!reads_a_reg(insts[i + 2], QPU_R_MS_REV_FLAGS));
247
                }
248
        }
249
 
250
        /*
251
         * "A single instruction can only perform a maximum of one of the
252
         *  following closely coupled peripheral accesses in a single
253
         *  instruction: TMU write, TMU read, TLB write, TLB read, TLB
254
         *  combined color read and write, SFU write, Mutex read or Semaphore
255
         *  access."
256
         */
257
        for (int i = 0; i < num_inst - 1; i++) {
258
                uint64_t inst = insts[i];
259
 
260
                assert(qpu_num_sf_accesses(inst) <= 1);
261
        }
262
}