Subversion Repositories Kolibri OS

Rev

Rev 5354 | Rev 6088 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 5354 Rev 6084
1
/*
1
/*
2
 * Copyright © 2014 Intel Corporation
2
 * Copyright © 2014 Intel Corporation
3
 *
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
10
 *
11
 * The above copyright notice and this permission notice (including the next
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
13
 * Software.
14
 *
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
21
 * IN THE SOFTWARE.
22
 *
22
 *
23
 * Authors:
23
 * Authors:
24
 *    Ben Widawsky 
24
 *    Ben Widawsky 
25
 *    Michel Thierry 
25
 *    Michel Thierry 
26
 *    Thomas Daniel 
26
 *    Thomas Daniel 
27
 *    Oscar Mateo 
27
 *    Oscar Mateo 
28
 *
28
 *
29
 */
29
 */
30
 
30
 
31
/**
31
/**
32
 * DOC: Logical Rings, Logical Ring Contexts and Execlists
32
 * DOC: Logical Rings, Logical Ring Contexts and Execlists
33
 *
33
 *
34
 * Motivation:
34
 * Motivation:
35
 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
35
 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
36
 * These expanded contexts enable a number of new abilities, especially
36
 * These expanded contexts enable a number of new abilities, especially
37
 * "Execlists" (also implemented in this file).
37
 * "Execlists" (also implemented in this file).
38
 *
38
 *
39
 * One of the main differences with the legacy HW contexts is that logical
39
 * One of the main differences with the legacy HW contexts is that logical
40
 * ring contexts incorporate many more things to the context's state, like
40
 * ring contexts incorporate many more things to the context's state, like
41
 * PDPs or ringbuffer control registers:
41
 * PDPs or ringbuffer control registers:
42
 *
42
 *
43
 * The reason why PDPs are included in the context is straightforward: as
43
 * The reason why PDPs are included in the context is straightforward: as
44
 * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
44
 * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
45
 * contained there mean you don't need to do a ppgtt->switch_mm yourself,
45
 * contained there mean you don't need to do a ppgtt->switch_mm yourself,
46
 * instead, the GPU will do it for you on the context switch.
46
 * instead, the GPU will do it for you on the context switch.
47
 *
47
 *
48
 * But, what about the ringbuffer control registers (head, tail, etc..)?
48
 * But, what about the ringbuffer control registers (head, tail, etc..)?
49
 * shouldn't we just need a set of those per engine command streamer? This is
49
 * shouldn't we just need a set of those per engine command streamer? This is
50
 * where the name "Logical Rings" starts to make sense: by virtualizing the
50
 * where the name "Logical Rings" starts to make sense: by virtualizing the
51
 * rings, the engine cs shifts to a new "ring buffer" with every context
51
 * rings, the engine cs shifts to a new "ring buffer" with every context
52
 * switch. When you want to submit a workload to the GPU you: A) choose your
52
 * switch. When you want to submit a workload to the GPU you: A) choose your
53
 * context, B) find its appropriate virtualized ring, C) write commands to it
53
 * context, B) find its appropriate virtualized ring, C) write commands to it
54
 * and then, finally, D) tell the GPU to switch to that context.
54
 * and then, finally, D) tell the GPU to switch to that context.
55
 *
55
 *
56
 * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
56
 * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
57
 * to a contexts is via a context execution list, ergo "Execlists".
57
 * to a contexts is via a context execution list, ergo "Execlists".
58
 *
58
 *
59
 * LRC implementation:
59
 * LRC implementation:
60
 * Regarding the creation of contexts, we have:
60
 * Regarding the creation of contexts, we have:
61
 *
61
 *
62
 * - One global default context.
62
 * - One global default context.
63
 * - One local default context for each opened fd.
63
 * - One local default context for each opened fd.
64
 * - One local extra context for each context create ioctl call.
64
 * - One local extra context for each context create ioctl call.
65
 *
65
 *
66
 * Now that ringbuffers belong per-context (and not per-engine, like before)
66
 * Now that ringbuffers belong per-context (and not per-engine, like before)
67
 * and that contexts are uniquely tied to a given engine (and not reusable,
67
 * and that contexts are uniquely tied to a given engine (and not reusable,
68
 * like before) we need:
68
 * like before) we need:
69
 *
69
 *
70
 * - One ringbuffer per-engine inside each context.
70
 * - One ringbuffer per-engine inside each context.
71
 * - One backing object per-engine inside each context.
71
 * - One backing object per-engine inside each context.
72
 *
72
 *
73
 * The global default context starts its life with these new objects fully
73
 * The global default context starts its life with these new objects fully
74
 * allocated and populated. The local default context for each opened fd is
74
 * allocated and populated. The local default context for each opened fd is
75
 * more complex, because we don't know at creation time which engine is going
75
 * more complex, because we don't know at creation time which engine is going
76
 * to use them. To handle this, we have implemented a deferred creation of LR
76
 * to use them. To handle this, we have implemented a deferred creation of LR
77
 * contexts:
77
 * contexts:
78
 *
78
 *
79
 * The local context starts its life as a hollow or blank holder, that only
79
 * The local context starts its life as a hollow or blank holder, that only
80
 * gets populated for a given engine once we receive an execbuffer. If later
80
 * gets populated for a given engine once we receive an execbuffer. If later
81
 * on we receive another execbuffer ioctl for the same context but a different
81
 * on we receive another execbuffer ioctl for the same context but a different
82
 * engine, we allocate/populate a new ringbuffer and context backing object and
82
 * engine, we allocate/populate a new ringbuffer and context backing object and
83
 * so on.
83
 * so on.
84
 *
84
 *
85
 * Finally, regarding local contexts created using the ioctl call: as they are
85
 * Finally, regarding local contexts created using the ioctl call: as they are
86
 * only allowed with the render ring, we can allocate & populate them right
86
 * only allowed with the render ring, we can allocate & populate them right
87
 * away (no need to defer anything, at least for now).
87
 * away (no need to defer anything, at least for now).
88
 *
88
 *
89
 * Execlists implementation:
89
 * Execlists implementation:
90
 * Execlists are the new method by which, on gen8+ hardware, workloads are
90
 * Execlists are the new method by which, on gen8+ hardware, workloads are
91
 * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
91
 * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
92
 * This method works as follows:
92
 * This method works as follows:
93
 *
93
 *
94
 * When a request is committed, its commands (the BB start and any leading or
94
 * When a request is committed, its commands (the BB start and any leading or
95
 * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
95
 * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
96
 * for the appropriate context. The tail pointer in the hardware context is not
96
 * for the appropriate context. The tail pointer in the hardware context is not
97
 * updated at this time, but instead, kept by the driver in the ringbuffer
97
 * updated at this time, but instead, kept by the driver in the ringbuffer
98
 * structure. A structure representing this request is added to a request queue
98
 * structure. A structure representing this request is added to a request queue
99
 * for the appropriate engine: this structure contains a copy of the context's
99
 * for the appropriate engine: this structure contains a copy of the context's
100
 * tail after the request was written to the ring buffer and a pointer to the
100
 * tail after the request was written to the ring buffer and a pointer to the
101
 * context itself.
101
 * context itself.
102
 *
102
 *
103
 * If the engine's request queue was empty before the request was added, the
103
 * If the engine's request queue was empty before the request was added, the
104
 * queue is processed immediately. Otherwise the queue will be processed during
104
 * queue is processed immediately. Otherwise the queue will be processed during
105
 * a context switch interrupt. In any case, elements on the queue will get sent
105
 * a context switch interrupt. In any case, elements on the queue will get sent
106
 * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
106
 * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
107
 * globally unique 20-bits submission ID.
107
 * globally unique 20-bits submission ID.
108
 *
108
 *
109
 * When execution of a request completes, the GPU updates the context status
109
 * When execution of a request completes, the GPU updates the context status
110
 * buffer with a context complete event and generates a context switch interrupt.
110
 * buffer with a context complete event and generates a context switch interrupt.
111
 * During the interrupt handling, the driver examines the events in the buffer:
111
 * During the interrupt handling, the driver examines the events in the buffer:
112
 * for each context complete event, if the announced ID matches that on the head
112
 * for each context complete event, if the announced ID matches that on the head
113
 * of the request queue, then that request is retired and removed from the queue.
113
 * of the request queue, then that request is retired and removed from the queue.
114
 *
114
 *
115
 * After processing, if any requests were retired and the queue is not empty
115
 * After processing, if any requests were retired and the queue is not empty
116
 * then a new execution list can be submitted. The two requests at the front of
116
 * then a new execution list can be submitted. The two requests at the front of
117
 * the queue are next to be submitted but since a context may not occur twice in
117
 * the queue are next to be submitted but since a context may not occur twice in
118
 * an execution list, if subsequent requests have the same ID as the first then
118
 * an execution list, if subsequent requests have the same ID as the first then
119
 * the two requests must be combined. This is done simply by discarding requests
119
 * the two requests must be combined. This is done simply by discarding requests
120
 * at the head of the queue until either only one requests is left (in which case
120
 * at the head of the queue until either only one requests is left (in which case
121
 * we use a NULL second context) or the first two requests have unique IDs.
121
 * we use a NULL second context) or the first two requests have unique IDs.
122
 *
122
 *
123
 * By always executing the first two requests in the queue the driver ensures
123
 * By always executing the first two requests in the queue the driver ensures
124
 * that the GPU is kept as busy as possible. In the case where a single context
124
 * that the GPU is kept as busy as possible. In the case where a single context
125
 * completes but a second context is still executing, the request for this second
125
 * completes but a second context is still executing, the request for this second
126
 * context will be at the head of the queue when we remove the first one. This
126
 * context will be at the head of the queue when we remove the first one. This
127
 * request will then be resubmitted along with a new request for a different context,
127
 * request will then be resubmitted along with a new request for a different context,
128
 * which will cause the hardware to continue executing the second request and queue
128
 * which will cause the hardware to continue executing the second request and queue
129
 * the new request (the GPU detects the condition of a context getting preempted
129
 * the new request (the GPU detects the condition of a context getting preempted
130
 * with the same context and optimizes the context switch flow by not doing
130
 * with the same context and optimizes the context switch flow by not doing
131
 * preemption, but just sampling the new tail pointer).
131
 * preemption, but just sampling the new tail pointer).
132
 *
132
 *
133
 */
133
 */
134
 
134
 
135
#include 
135
#include 
136
#include 
136
#include 
137
#include "intel_drv.h"
137
#include "intel_drv.h"
138
#include "i915_drv.h"
138
#include "intel_mocs.h"
139
 
139
 
140
#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
140
#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
141
#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
141
#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
142
#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
142
#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
143
 
143
 
144
#define RING_EXECLIST_QFULL		(1 << 0x2)
144
#define RING_EXECLIST_QFULL		(1 << 0x2)
145
#define RING_EXECLIST1_VALID		(1 << 0x3)
145
#define RING_EXECLIST1_VALID		(1 << 0x3)
146
#define RING_EXECLIST0_VALID		(1 << 0x4)
146
#define RING_EXECLIST0_VALID		(1 << 0x4)
147
#define RING_EXECLIST_ACTIVE_STATUS	(3 << 0xE)
147
#define RING_EXECLIST_ACTIVE_STATUS	(3 << 0xE)
148
#define RING_EXECLIST1_ACTIVE		(1 << 0x11)
148
#define RING_EXECLIST1_ACTIVE		(1 << 0x11)
149
#define RING_EXECLIST0_ACTIVE		(1 << 0x12)
149
#define RING_EXECLIST0_ACTIVE		(1 << 0x12)
150
 
150
 
151
#define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
151
#define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
152
#define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
152
#define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
153
#define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
153
#define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
154
#define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
154
#define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
155
#define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
155
#define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
156
#define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
156
#define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
157
 
157
 
158
#define CTX_LRI_HEADER_0		0x01
158
#define CTX_LRI_HEADER_0		0x01
159
#define CTX_CONTEXT_CONTROL		0x02
159
#define CTX_CONTEXT_CONTROL		0x02
160
#define CTX_RING_HEAD			0x04
160
#define CTX_RING_HEAD			0x04
161
#define CTX_RING_TAIL			0x06
161
#define CTX_RING_TAIL			0x06
162
#define CTX_RING_BUFFER_START		0x08
162
#define CTX_RING_BUFFER_START		0x08
163
#define CTX_RING_BUFFER_CONTROL		0x0a
163
#define CTX_RING_BUFFER_CONTROL		0x0a
164
#define CTX_BB_HEAD_U			0x0c
164
#define CTX_BB_HEAD_U			0x0c
165
#define CTX_BB_HEAD_L			0x0e
165
#define CTX_BB_HEAD_L			0x0e
166
#define CTX_BB_STATE			0x10
166
#define CTX_BB_STATE			0x10
167
#define CTX_SECOND_BB_HEAD_U		0x12
167
#define CTX_SECOND_BB_HEAD_U		0x12
168
#define CTX_SECOND_BB_HEAD_L		0x14
168
#define CTX_SECOND_BB_HEAD_L		0x14
169
#define CTX_SECOND_BB_STATE		0x16
169
#define CTX_SECOND_BB_STATE		0x16
170
#define CTX_BB_PER_CTX_PTR		0x18
170
#define CTX_BB_PER_CTX_PTR		0x18
171
#define CTX_RCS_INDIRECT_CTX		0x1a
171
#define CTX_RCS_INDIRECT_CTX		0x1a
172
#define CTX_RCS_INDIRECT_CTX_OFFSET	0x1c
172
#define CTX_RCS_INDIRECT_CTX_OFFSET	0x1c
173
#define CTX_LRI_HEADER_1		0x21
173
#define CTX_LRI_HEADER_1		0x21
174
#define CTX_CTX_TIMESTAMP		0x22
174
#define CTX_CTX_TIMESTAMP		0x22
175
#define CTX_PDP3_UDW			0x24
175
#define CTX_PDP3_UDW			0x24
176
#define CTX_PDP3_LDW			0x26
176
#define CTX_PDP3_LDW			0x26
177
#define CTX_PDP2_UDW			0x28
177
#define CTX_PDP2_UDW			0x28
178
#define CTX_PDP2_LDW			0x2a
178
#define CTX_PDP2_LDW			0x2a
179
#define CTX_PDP1_UDW			0x2c
179
#define CTX_PDP1_UDW			0x2c
180
#define CTX_PDP1_LDW			0x2e
180
#define CTX_PDP1_LDW			0x2e
181
#define CTX_PDP0_UDW			0x30
181
#define CTX_PDP0_UDW			0x30
182
#define CTX_PDP0_LDW			0x32
182
#define CTX_PDP0_LDW			0x32
183
#define CTX_LRI_HEADER_2		0x41
183
#define CTX_LRI_HEADER_2		0x41
184
#define CTX_R_PWR_CLK_STATE		0x42
184
#define CTX_R_PWR_CLK_STATE		0x42
185
#define CTX_GPGPU_CSR_BASE_ADDRESS	0x44
185
#define CTX_GPGPU_CSR_BASE_ADDRESS	0x44
186
 
186
 
187
#define GEN8_CTX_VALID (1<<0)
187
#define GEN8_CTX_VALID (1<<0)
188
#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
188
#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
189
#define GEN8_CTX_FORCE_RESTORE (1<<2)
189
#define GEN8_CTX_FORCE_RESTORE (1<<2)
190
#define GEN8_CTX_L3LLC_COHERENT (1<<5)
190
#define GEN8_CTX_L3LLC_COHERENT (1<<5)
191
#define GEN8_CTX_PRIVILEGE (1<<8)
191
#define GEN8_CTX_PRIVILEGE (1<<8)
-
 
192
 
-
 
193
#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) { \
-
 
194
	const u64 _addr = i915_page_dir_dma_addr((ppgtt), (n));	\
-
 
195
	reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
-
 
196
	reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
-
 
197
}
-
 
198
 
-
 
199
#define ASSIGN_CTX_PML4(ppgtt, reg_state) { \
-
 
200
	reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
-
 
201
	reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
-
 
202
}
-
 
203
 
192
enum {
204
enum {
193
	ADVANCED_CONTEXT = 0,
205
	ADVANCED_CONTEXT = 0,
194
	LEGACY_CONTEXT,
206
	LEGACY_32B_CONTEXT,
195
	ADVANCED_AD_CONTEXT,
207
	ADVANCED_AD_CONTEXT,
196
	LEGACY_64B_CONTEXT
208
	LEGACY_64B_CONTEXT
197
};
209
};
198
#define GEN8_CTX_MODE_SHIFT 3
210
#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
-
 
211
#define GEN8_CTX_ADDRESSING_MODE(dev)  (USES_FULL_48BIT_PPGTT(dev) ?\
-
 
212
		LEGACY_64B_CONTEXT :\
-
 
213
		LEGACY_32B_CONTEXT)
199
enum {
214
enum {
200
	FAULT_AND_HANG = 0,
215
	FAULT_AND_HANG = 0,
201
	FAULT_AND_HALT, /* Debug only */
216
	FAULT_AND_HALT, /* Debug only */
202
	FAULT_AND_STREAM,
217
	FAULT_AND_STREAM,
203
	FAULT_AND_CONTINUE /* Unsupported */
218
	FAULT_AND_CONTINUE /* Unsupported */
204
};
219
};
205
#define GEN8_CTX_ID_SHIFT 32
220
#define GEN8_CTX_ID_SHIFT 32
-
 
221
#define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT  0x17
-
 
222
 
-
 
223
static int intel_lr_context_pin(struct drm_i915_gem_request *rq);
-
 
224
static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
-
 
225
		struct drm_i915_gem_object *default_ctx_obj);
206
 
-
 
207
static int intel_lr_context_pin(struct intel_engine_cs *ring,
-
 
208
		struct intel_context *ctx);
226
 
209
 
227
 
210
/**
228
/**
211
 * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
229
 * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
212
 * @dev: DRM device.
230
 * @dev: DRM device.
213
 * @enable_execlists: value of i915.enable_execlists module parameter.
231
 * @enable_execlists: value of i915.enable_execlists module parameter.
214
 *
232
 *
215
 * Only certain platforms support Execlists (the prerequisites being
233
 * Only certain platforms support Execlists (the prerequisites being
216
 * support for Logical Ring Contexts and Aliasing PPGTT or better),
234
 * support for Logical Ring Contexts and Aliasing PPGTT or better).
217
 * and only when enabled via module parameter.
-
 
218
 *
235
 *
219
 * Return: 1 if Execlists is supported and has to be enabled.
236
 * Return: 1 if Execlists is supported and has to be enabled.
220
 */
237
 */
221
int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists)
238
int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists)
222
{
239
{
223
	WARN_ON(i915.enable_ppgtt == -1);
240
	WARN_ON(i915.enable_ppgtt == -1);
-
 
241
 
-
 
242
	/* On platforms with execlist available, vGPU will only
-
 
243
	 * support execlist mode, no ring buffer mode.
-
 
244
	 */
-
 
245
	if (HAS_LOGICAL_RING_CONTEXTS(dev) && intel_vgpu_active(dev))
-
 
246
		return 1;
224
 
247
 
225
	if (INTEL_INFO(dev)->gen >= 9)
248
	if (INTEL_INFO(dev)->gen >= 9)
226
		return 1;
249
		return 1;
227
 
250
 
228
	if (enable_execlists == 0)
251
	if (enable_execlists == 0)
229
		return 0;
252
		return 0;
230
 
253
 
231
	if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev) &&
254
	if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev) &&
232
	    i915.use_mmio_flip >= 0)
255
	    i915.use_mmio_flip >= 0)
233
		return 1;
256
		return 1;
234
 
257
 
235
	return 0;
258
	return 0;
236
}
259
}
237
 
260
 
238
/**
261
/**
239
 * intel_execlists_ctx_id() - get the Execlists Context ID
262
 * intel_execlists_ctx_id() - get the Execlists Context ID
240
 * @ctx_obj: Logical Ring Context backing object.
263
 * @ctx_obj: Logical Ring Context backing object.
241
 *
264
 *
242
 * Do not confuse with ctx->id! Unfortunately we have a name overload
265
 * Do not confuse with ctx->id! Unfortunately we have a name overload
243
 * here: the old context ID we pass to userspace as a handler so that
266
 * here: the old context ID we pass to userspace as a handler so that
244
 * they can refer to a context, and the new context ID we pass to the
267
 * they can refer to a context, and the new context ID we pass to the
245
 * ELSP so that the GPU can inform us of the context status via
268
 * ELSP so that the GPU can inform us of the context status via
246
 * interrupts.
269
 * interrupts.
247
 *
270
 *
248
 * Return: 20-bits globally unique context ID.
271
 * Return: 20-bits globally unique context ID.
249
 */
272
 */
250
u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
273
u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
251
{
274
{
252
	u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj);
275
	u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
-
 
276
			LRC_PPHWSP_PN * PAGE_SIZE;
253
 
277
 
254
	/* LRCA is required to be 4K aligned so the more significant 20 bits
278
	/* LRCA is required to be 4K aligned so the more significant 20 bits
255
	 * are globally unique */
279
	 * are globally unique */
256
	return lrca >> 12;
280
	return lrca >> 12;
257
}
281
}
258
 
282
 
259
static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj)
283
static bool disable_lite_restore_wa(struct intel_engine_cs *ring)
-
 
284
{
-
 
285
	struct drm_device *dev = ring->dev;
-
 
286
 
-
 
287
	return ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
-
 
288
		(IS_BROXTON(dev) && INTEL_REVID(dev) == BXT_REVID_A0)) &&
-
 
289
	       (ring->id == VCS || ring->id == VCS2);
-
 
290
}
-
 
291
 
-
 
292
uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
-
 
293
				     struct intel_engine_cs *ring)
-
 
294
{
260
{
295
	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
261
	uint64_t desc;
296
	uint64_t desc;
-
 
297
	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
262
	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
298
			LRC_PPHWSP_PN * PAGE_SIZE;
263
 
299
 
264
	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
300
	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
265
 
301
 
266
	desc = GEN8_CTX_VALID;
302
	desc = GEN8_CTX_VALID;
267
	desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
303
	desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT;
-
 
304
	if (IS_GEN8(ctx_obj->base.dev))
268
	desc |= GEN8_CTX_L3LLC_COHERENT;
305
		desc |= GEN8_CTX_L3LLC_COHERENT;
269
	desc |= GEN8_CTX_PRIVILEGE;
306
	desc |= GEN8_CTX_PRIVILEGE;
270
	desc |= lrca;
307
	desc |= lrca;
271
	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
308
	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
272
 
309
 
273
	/* TODO: WaDisableLiteRestore when we start using semaphore
310
	/* TODO: WaDisableLiteRestore when we start using semaphore
274
	 * signalling between Command Streamers */
311
	 * signalling between Command Streamers */
275
	/* desc |= GEN8_CTX_FORCE_RESTORE; */
312
	/* desc |= GEN8_CTX_FORCE_RESTORE; */
-
 
313
 
-
 
314
	/* WaEnableForceRestoreInCtxtDescForVCS:skl */
-
 
315
	/* WaEnableForceRestoreInCtxtDescForVCS:bxt */
-
 
316
	if (disable_lite_restore_wa(ring))
-
 
317
		desc |= GEN8_CTX_FORCE_RESTORE;
276
 
318
 
277
	return desc;
319
	return desc;
278
}
320
}
279
 
321
 
280
static void execlists_elsp_write(struct intel_engine_cs *ring,
-
 
281
				 struct drm_i915_gem_object *ctx_obj0,
322
static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
282
				 struct drm_i915_gem_object *ctx_obj1)
323
				 struct drm_i915_gem_request *rq1)
-
 
324
{
-
 
325
 
283
{
326
	struct intel_engine_cs *ring = rq0->ring;
284
	struct drm_device *dev = ring->dev;
327
	struct drm_device *dev = ring->dev;
285
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
286
	uint64_t temp = 0;
328
	struct drm_i915_private *dev_priv = dev->dev_private;
287
	uint32_t desc[4];
-
 
288
	unsigned long flags;
-
 
289
 
329
	uint64_t desc[2];
290
	/* XXX: You must always write both descriptors in the order below. */
330
 
291
	if (ctx_obj1)
-
 
292
		temp = execlists_ctx_descriptor(ctx_obj1);
-
 
293
	else
-
 
294
		temp = 0;
-
 
295
	desc[1] = (u32)(temp >> 32);
-
 
296
	desc[0] = (u32)temp;
-
 
297
 
-
 
298
	temp = execlists_ctx_descriptor(ctx_obj0);
331
	if (rq1) {
299
	desc[3] = (u32)(temp >> 32);
-
 
300
	desc[2] = (u32)temp;
-
 
301
 
-
 
302
	/* Set Force Wakeup bit to prevent GT from entering C6 while ELSP writes
-
 
303
	 * are in progress.
-
 
304
	 *
-
 
305
	 * The other problem is that we can't just call gen6_gt_force_wake_get()
-
 
306
	 * because that function calls intel_runtime_pm_get(), which might sleep.
-
 
307
	 * Instead, we do the runtime_pm_get/put when creating/destroying requests.
-
 
308
	 */
-
 
309
	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
-
 
310
	if (IS_CHERRYVIEW(dev) || INTEL_INFO(dev)->gen >= 9) {
-
 
311
		if (dev_priv->uncore.fw_rendercount++ == 0)
-
 
312
			dev_priv->uncore.funcs.force_wake_get(dev_priv,
-
 
313
							      FORCEWAKE_RENDER);
-
 
314
		if (dev_priv->uncore.fw_mediacount++ == 0)
-
 
315
			dev_priv->uncore.funcs.force_wake_get(dev_priv,
-
 
316
							      FORCEWAKE_MEDIA);
-
 
317
		if (INTEL_INFO(dev)->gen >= 9) {
-
 
318
			if (dev_priv->uncore.fw_blittercount++ == 0)
-
 
319
				dev_priv->uncore.funcs.force_wake_get(dev_priv,
-
 
320
							FORCEWAKE_BLITTER);
332
		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->ring);
321
		}
-
 
322
	} else {
-
 
323
		if (dev_priv->uncore.forcewake_count++ == 0)
333
		rq1->elsp_submitted++;
324
			dev_priv->uncore.funcs.force_wake_get(dev_priv,
334
	} else {
325
							      FORCEWAKE_ALL);
-
 
326
	}
-
 
327
	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
-
 
328
 
-
 
329
	I915_WRITE(RING_ELSP(ring), desc[1]);
-
 
330
	I915_WRITE(RING_ELSP(ring), desc[0]);
-
 
331
	I915_WRITE(RING_ELSP(ring), desc[3]);
-
 
332
	/* The context is automatically loaded after the following */
335
		desc[1] = 0;
333
	I915_WRITE(RING_ELSP(ring), desc[2]);
336
	}
334
 
337
 
335
	/* ELSP is a wo register, so use another nearby reg for posting instead */
338
	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->ring);
336
	POSTING_READ(RING_EXECLIST_STATUS(ring));
-
 
337
 
-
 
338
	/* Release Force Wakeup (see the big comment above). */
-
 
339
	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
-
 
340
	if (IS_CHERRYVIEW(dev) || INTEL_INFO(dev)->gen >= 9) {
-
 
341
		if (--dev_priv->uncore.fw_rendercount == 0)
339
	rq0->elsp_submitted++;
342
			dev_priv->uncore.funcs.force_wake_put(dev_priv,
-
 
343
							      FORCEWAKE_RENDER);
-
 
344
		if (--dev_priv->uncore.fw_mediacount == 0)
-
 
345
			dev_priv->uncore.funcs.force_wake_put(dev_priv,
340
 
346
							      FORCEWAKE_MEDIA);
-
 
347
		if (INTEL_INFO(dev)->gen >= 9) {
-
 
348
			if (--dev_priv->uncore.fw_blittercount == 0)
-
 
349
				dev_priv->uncore.funcs.force_wake_put(dev_priv,
-
 
350
							FORCEWAKE_BLITTER);
341
	/* You must always write both descriptors in the order below. */
351
		}
-
 
352
	} else {
-
 
-
 
342
	spin_lock(&dev_priv->uncore.lock);
-
 
343
	intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
-
 
344
	I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[1]));
-
 
345
	I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[1]));
-
 
346
 
-
 
347
	I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[0]));
-
 
348
	/* The context is automatically loaded after the following */
353
		if (--dev_priv->uncore.forcewake_count == 0)
349
	I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[0]));
354
			dev_priv->uncore.funcs.force_wake_put(dev_priv,
350
 
355
							      FORCEWAKE_ALL);
351
	/* ELSP is a wo register, use another nearby reg for posting */
356
	}
-
 
357
 
-
 
358
	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
352
	POSTING_READ_FW(RING_EXECLIST_STATUS_LO(ring));
-
 
353
	intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
-
 
354
	spin_unlock(&dev_priv->uncore.lock);
-
 
355
}
-
 
356
 
359
}
357
static int execlists_update_context(struct drm_i915_gem_request *rq)
360
 
358
{
361
static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
359
	struct intel_engine_cs *ring = rq->ring;
362
				    struct drm_i915_gem_object *ring_obj,
360
	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
363
				    u32 tail)
361
	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
364
{
362
	struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj;
365
	struct page *page;
363
	struct page *page;
366
	uint32_t *reg_state;
364
	uint32_t *reg_state;
-
 
365
 
-
 
366
	BUG_ON(!ctx_obj);
-
 
367
	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
-
 
368
	WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
367
 
369
 
368
	page = i915_gem_object_get_page(ctx_obj, 1);
370
	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
369
	reg_state = kmap_atomic(page);
371
	reg_state = kmap_atomic(page);
370
 
372
 
-
 
373
	reg_state[CTX_RING_TAIL+1] = rq->tail;
-
 
374
	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
-
 
375
 
-
 
376
	if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
-
 
377
		/* True 32b PPGTT with dynamic page allocation: update PDP
-
 
378
		 * registers and point the unallocated PDPs to scratch page.
-
 
379
		 * PML4 is allocated during ppgtt init, so this is not needed
-
 
380
		 * in 48-bit mode.
-
 
381
		 */
-
 
382
		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
-
 
383
		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
-
 
384
		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
371
	reg_state[CTX_RING_TAIL+1] = tail;
385
		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
372
	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
386
	}
373
 
387
 
374
	kunmap_atomic(reg_state);
388
	kunmap_atomic(reg_state);
375
 
389
 
376
	return 0;
390
	return 0;
377
}
391
}
378
 
392
 
379
static void execlists_submit_contexts(struct intel_engine_cs *ring,
-
 
380
				      struct intel_context *to0, u32 tail0,
393
static void execlists_submit_requests(struct drm_i915_gem_request *rq0,
381
				      struct intel_context *to1, u32 tail1)
394
				      struct drm_i915_gem_request *rq1)
382
{
-
 
383
	struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state;
-
 
384
	struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf;
-
 
385
	struct drm_i915_gem_object *ctx_obj1 = NULL;
-
 
386
	struct intel_ringbuffer *ringbuf1 = NULL;
-
 
387
 
-
 
388
	BUG_ON(!ctx_obj0);
-
 
389
	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
-
 
390
	WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
-
 
391
 
395
{
392
	execlists_update_context(ctx_obj0, ringbuf0->obj, tail0);
396
	execlists_update_context(rq0);
393
 
-
 
394
	if (to1) {
-
 
395
		ringbuf1 = to1->engine[ring->id].ringbuf;
397
 
396
		ctx_obj1 = to1->engine[ring->id].state;
-
 
397
		BUG_ON(!ctx_obj1);
-
 
398
		WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
-
 
399
		WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj));
-
 
400
 
-
 
401
		execlists_update_context(ctx_obj1, ringbuf1->obj, tail1);
398
	if (rq1)
402
	}
399
		execlists_update_context(rq1);
403
 
400
 
404
	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
401
	execlists_elsp_write(rq0, rq1);
405
}
402
}
406
 
403
 
407
static void execlists_context_unqueue(struct intel_engine_cs *ring)
404
static void execlists_context_unqueue(struct intel_engine_cs *ring)
408
{
405
{
409
	struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
406
	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
410
	struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
407
	struct drm_i915_gem_request *cursor = NULL, *tmp = NULL;
411
 
408
 
412
	assert_spin_locked(&ring->execlist_lock);
409
	assert_spin_locked(&ring->execlist_lock);
-
 
410
 
-
 
411
	/*
-
 
412
	 * If irqs are not active generate a warning as batches that finish
-
 
413
	 * without the irqs may get lost and a GPU Hang may occur.
-
 
414
	 */
-
 
415
	WARN_ON(!intel_irqs_enabled(ring->dev->dev_private));
413
 
416
 
414
	if (list_empty(&ring->execlist_queue))
417
	if (list_empty(&ring->execlist_queue))
415
		return;
418
		return;
416
 
419
 
417
	/* Try to read in pairs */
420
	/* Try to read in pairs */
418
	list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
421
	list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
419
				 execlist_link) {
422
				 execlist_link) {
420
		if (!req0) {
423
		if (!req0) {
421
			req0 = cursor;
424
			req0 = cursor;
422
		} else if (req0->ctx == cursor->ctx) {
425
		} else if (req0->ctx == cursor->ctx) {
423
			/* Same ctx: ignore first request, as second request
426
			/* Same ctx: ignore first request, as second request
424
			 * will update tail past first request's workload */
427
			 * will update tail past first request's workload */
425
			cursor->elsp_submitted = req0->elsp_submitted;
428
			cursor->elsp_submitted = req0->elsp_submitted;
426
			list_del(&req0->execlist_link);
429
			list_del(&req0->execlist_link);
427
			list_add_tail(&req0->execlist_link,
430
			list_add_tail(&req0->execlist_link,
428
				&ring->execlist_retired_req_list);
431
				&ring->execlist_retired_req_list);
429
			req0 = cursor;
432
			req0 = cursor;
430
		} else {
433
		} else {
431
			req1 = cursor;
434
			req1 = cursor;
432
			break;
435
			break;
433
		}
436
		}
434
	}
437
	}
-
 
438
 
-
 
439
	if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) {
-
 
440
		/*
-
 
441
		 * WaIdleLiteRestore: make sure we never cause a lite
-
 
442
		 * restore with HEAD==TAIL
-
 
443
		 */
-
 
444
		if (req0->elsp_submitted) {
-
 
445
			/*
-
 
446
			 * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL
-
 
447
			 * as we resubmit the request. See gen8_emit_request()
-
 
448
			 * for where we prepare the padding after the end of the
-
 
449
			 * request.
-
 
450
			 */
-
 
451
			struct intel_ringbuffer *ringbuf;
-
 
452
 
-
 
453
			ringbuf = req0->ctx->engine[ring->id].ringbuf;
-
 
454
			req0->tail += 8;
-
 
455
			req0->tail &= ringbuf->size - 1;
-
 
456
		}
-
 
457
	}
435
 
458
 
436
	WARN_ON(req1 && req1->elsp_submitted);
459
	WARN_ON(req1 && req1->elsp_submitted);
437
 
-
 
438
	execlists_submit_contexts(ring, req0->ctx, req0->tail,
-
 
439
				  req1 ? req1->ctx : NULL,
-
 
440
				  req1 ? req1->tail : 0);
-
 
441
 
-
 
442
	req0->elsp_submitted++;
-
 
443
	if (req1)
460
 
444
		req1->elsp_submitted++;
461
	execlists_submit_requests(req0, req1);
445
}
462
}
446
 
463
 
447
static bool execlists_check_remove_request(struct intel_engine_cs *ring,
464
static bool execlists_check_remove_request(struct intel_engine_cs *ring,
448
					   u32 request_id)
465
					   u32 request_id)
449
{
466
{
450
	struct intel_ctx_submit_request *head_req;
467
	struct drm_i915_gem_request *head_req;
451
 
468
 
452
	assert_spin_locked(&ring->execlist_lock);
469
	assert_spin_locked(&ring->execlist_lock);
453
 
470
 
454
	head_req = list_first_entry_or_null(&ring->execlist_queue,
471
	head_req = list_first_entry_or_null(&ring->execlist_queue,
455
					    struct intel_ctx_submit_request,
472
					    struct drm_i915_gem_request,
456
					    execlist_link);
473
					    execlist_link);
457
 
474
 
458
	if (head_req != NULL) {
475
	if (head_req != NULL) {
459
		struct drm_i915_gem_object *ctx_obj =
476
		struct drm_i915_gem_object *ctx_obj =
460
				head_req->ctx->engine[ring->id].state;
477
				head_req->ctx->engine[ring->id].state;
461
		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
478
		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
462
			WARN(head_req->elsp_submitted == 0,
479
			WARN(head_req->elsp_submitted == 0,
463
			     "Never submitted head request\n");
480
			     "Never submitted head request\n");
464
 
481
 
465
			if (--head_req->elsp_submitted <= 0) {
482
			if (--head_req->elsp_submitted <= 0) {
466
				list_del(&head_req->execlist_link);
483
				list_del(&head_req->execlist_link);
467
				list_add_tail(&head_req->execlist_link,
484
				list_add_tail(&head_req->execlist_link,
468
					&ring->execlist_retired_req_list);
485
					&ring->execlist_retired_req_list);
469
				return true;
486
				return true;
470
			}
487
			}
471
		}
488
		}
472
	}
489
	}
473
 
490
 
474
	return false;
491
	return false;
475
}
492
}
476
 
493
 
477
/**
494
/**
478
 * intel_execlists_handle_ctx_events() - handle Context Switch interrupts
495
 * intel_lrc_irq_handler() - handle Context Switch interrupts
479
 * @ring: Engine Command Streamer to handle.
496
 * @ring: Engine Command Streamer to handle.
480
 *
497
 *
481
 * Check the unread Context Status Buffers and manage the submission of new
498
 * Check the unread Context Status Buffers and manage the submission of new
482
 * contexts to the ELSP accordingly.
499
 * contexts to the ELSP accordingly.
483
 */
500
 */
484
void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring)
501
void intel_lrc_irq_handler(struct intel_engine_cs *ring)
485
{
502
{
486
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
503
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
487
	u32 status_pointer;
504
	u32 status_pointer;
488
	u8 read_pointer;
505
	u8 read_pointer;
489
	u8 write_pointer;
506
	u8 write_pointer;
490
	u32 status;
507
	u32 status = 0;
491
	u32 status_id;
508
	u32 status_id;
492
	u32 submit_contexts = 0;
509
	u32 submit_contexts = 0;
493
 
510
 
494
	status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
511
	status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
495
 
512
 
496
	read_pointer = ring->next_context_status_buffer;
513
	read_pointer = ring->next_context_status_buffer;
497
	write_pointer = status_pointer & 0x07;
514
	write_pointer = status_pointer & GEN8_CSB_PTR_MASK;
498
	if (read_pointer > write_pointer)
515
	if (read_pointer > write_pointer)
499
		write_pointer += 6;
516
		write_pointer += GEN8_CSB_ENTRIES;
500
 
517
 
501
	spin_lock(&ring->execlist_lock);
518
	spin_lock(&ring->execlist_lock);
502
 
519
 
503
	while (read_pointer < write_pointer) {
520
	while (read_pointer < write_pointer) {
504
		read_pointer++;
521
		read_pointer++;
505
		status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
522
		status = I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer % GEN8_CSB_ENTRIES));
506
				(read_pointer % 6) * 8);
523
		status_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer % GEN8_CSB_ENTRIES));
-
 
524
 
507
		status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
525
		if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
508
				(read_pointer % 6) * 8 + 4);
526
			continue;
509
 
527
 
510
		if (status & GEN8_CTX_STATUS_PREEMPTED) {
528
		if (status & GEN8_CTX_STATUS_PREEMPTED) {
511
			if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
529
			if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
512
				if (execlists_check_remove_request(ring, status_id))
530
				if (execlists_check_remove_request(ring, status_id))
513
					WARN(1, "Lite Restored request removed from queue\n");
531
					WARN(1, "Lite Restored request removed from queue\n");
514
			} else
532
			} else
515
				WARN(1, "Preemption without Lite Restore\n");
533
				WARN(1, "Preemption without Lite Restore\n");
516
		}
534
		}
517
 
535
 
518
		 if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
536
		 if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
519
		     (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
537
		     (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
520
			if (execlists_check_remove_request(ring, status_id))
538
			if (execlists_check_remove_request(ring, status_id))
521
				submit_contexts++;
539
				submit_contexts++;
522
		}
540
		}
523
	}
541
	}
-
 
542
 
-
 
543
	if (disable_lite_restore_wa(ring)) {
-
 
544
		/* Prevent a ctx to preempt itself */
524
 
545
		if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) &&
525
	if (submit_contexts != 0)
546
		    (submit_contexts != 0))
-
 
547
			execlists_context_unqueue(ring);
-
 
548
	} else if (submit_contexts != 0) {
-
 
549
		execlists_context_unqueue(ring);
526
		execlists_context_unqueue(ring);
550
	}
527
 
551
 
528
	spin_unlock(&ring->execlist_lock);
552
	spin_unlock(&ring->execlist_lock);
529
 
553
 
530
	WARN(submit_contexts > 2, "More than two context complete events?\n");
554
	WARN(submit_contexts > 2, "More than two context complete events?\n");
531
	ring->next_context_status_buffer = write_pointer % 6;
555
	ring->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
532
 
556
 
-
 
557
	I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
533
	I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
558
		   _MASKED_FIELD(GEN8_CSB_PTR_MASK << 8,
-
 
559
				 ((u32)ring->next_context_status_buffer &
534
		   ((u32)ring->next_context_status_buffer & 0x07) << 8);
560
				  GEN8_CSB_PTR_MASK) << 8));
535
}
561
}
536
 
-
 
537
static int execlists_context_queue(struct intel_engine_cs *ring,
-
 
538
				   struct intel_context *to,
562
 
539
				   u32 tail)
563
static int execlists_context_queue(struct drm_i915_gem_request *request)
540
{
564
{
541
	struct intel_ctx_submit_request *req = NULL, *cursor;
-
 
542
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
565
	struct intel_engine_cs *ring = request->ring;
543
	unsigned long flags;
-
 
544
	int num_elements = 0;
-
 
545
 
-
 
546
	req = kzalloc(sizeof(*req), GFP_KERNEL);
-
 
547
	if (req == NULL)
-
 
548
		return -ENOMEM;
-
 
549
	req->ctx = to;
566
	struct drm_i915_gem_request *cursor;
550
	i915_gem_context_reference(req->ctx);
567
	int num_elements = 0;
551
 
-
 
552
	if (to != ring->default_context)
-
 
553
		intel_lr_context_pin(ring, to);
-
 
554
 
568
 
555
	req->ring = ring;
569
	if (request->ctx != ring->default_context)
556
	req->tail = tail;
570
		intel_lr_context_pin(request);
557
 
571
 
558
	intel_runtime_pm_get(dev_priv);
572
	i915_gem_request_reference(request);
559
 
573
 
560
	spin_lock_irqsave(&ring->execlist_lock, flags);
574
	spin_lock_irq(&ring->execlist_lock);
561
 
575
 
562
	list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
576
	list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
563
		if (++num_elements > 2)
577
		if (++num_elements > 2)
564
			break;
578
			break;
565
 
579
 
566
	if (num_elements > 2) {
580
	if (num_elements > 2) {
567
		struct intel_ctx_submit_request *tail_req;
581
		struct drm_i915_gem_request *tail_req;
568
 
582
 
569
		tail_req = list_last_entry(&ring->execlist_queue,
583
		tail_req = list_last_entry(&ring->execlist_queue,
570
					   struct intel_ctx_submit_request,
584
					   struct drm_i915_gem_request,
571
					   execlist_link);
585
					   execlist_link);
572
 
586
 
573
		if (to == tail_req->ctx) {
587
		if (request->ctx == tail_req->ctx) {
574
			WARN(tail_req->elsp_submitted != 0,
588
			WARN(tail_req->elsp_submitted != 0,
575
				"More than 2 already-submitted reqs queued\n");
589
				"More than 2 already-submitted reqs queued\n");
576
			list_del(&tail_req->execlist_link);
590
			list_del(&tail_req->execlist_link);
577
			list_add_tail(&tail_req->execlist_link,
591
			list_add_tail(&tail_req->execlist_link,
578
				&ring->execlist_retired_req_list);
592
				&ring->execlist_retired_req_list);
579
		}
593
		}
580
	}
594
	}
581
 
595
 
582
	list_add_tail(&req->execlist_link, &ring->execlist_queue);
596
	list_add_tail(&request->execlist_link, &ring->execlist_queue);
583
	if (num_elements == 0)
597
	if (num_elements == 0)
584
		execlists_context_unqueue(ring);
598
		execlists_context_unqueue(ring);
585
 
599
 
586
	spin_unlock_irqrestore(&ring->execlist_lock, flags);
600
	spin_unlock_irq(&ring->execlist_lock);
587
 
601
 
588
	return 0;
602
	return 0;
589
}
603
}
590
 
604
 
591
static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
605
static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
592
{
606
{
593
	struct intel_engine_cs *ring = ringbuf->ring;
607
	struct intel_engine_cs *ring = req->ring;
594
	uint32_t flush_domains;
608
	uint32_t flush_domains;
595
	int ret;
609
	int ret;
596
 
610
 
597
	flush_domains = 0;
611
	flush_domains = 0;
598
	if (ring->gpu_caches_dirty)
612
	if (ring->gpu_caches_dirty)
599
		flush_domains = I915_GEM_GPU_DOMAINS;
613
		flush_domains = I915_GEM_GPU_DOMAINS;
600
 
614
 
601
	ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains);
615
	ret = ring->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
602
	if (ret)
616
	if (ret)
603
		return ret;
617
		return ret;
604
 
618
 
605
	ring->gpu_caches_dirty = false;
619
	ring->gpu_caches_dirty = false;
606
	return 0;
620
	return 0;
607
}
621
}
608
 
622
 
609
static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
623
static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
610
				 struct list_head *vmas)
624
				 struct list_head *vmas)
611
{
625
{
612
	struct intel_engine_cs *ring = ringbuf->ring;
626
	const unsigned other_rings = ~intel_ring_flag(req->ring);
613
	struct i915_vma *vma;
627
	struct i915_vma *vma;
614
	uint32_t flush_domains = 0;
628
	uint32_t flush_domains = 0;
615
	bool flush_chipset = false;
629
	bool flush_chipset = false;
616
	int ret;
630
	int ret;
617
 
631
 
618
	list_for_each_entry(vma, vmas, exec_list) {
632
	list_for_each_entry(vma, vmas, exec_list) {
619
		struct drm_i915_gem_object *obj = vma->obj;
633
		struct drm_i915_gem_object *obj = vma->obj;
-
 
634
 
620
 
635
		if (obj->active & other_rings) {
621
		ret = i915_gem_object_sync(obj, ring);
636
			ret = i915_gem_object_sync(obj, req->ring, &req);
622
		if (ret)
637
			if (ret)
-
 
638
				return ret;
623
			return ret;
639
		}
624
 
640
 
625
		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
641
		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
626
			flush_chipset |= i915_gem_clflush_object(obj, false);
642
			flush_chipset |= i915_gem_clflush_object(obj, false);
627
 
643
 
628
		flush_domains |= obj->base.write_domain;
644
		flush_domains |= obj->base.write_domain;
629
	}
645
	}
630
 
646
 
631
	if (flush_domains & I915_GEM_DOMAIN_GTT)
647
	if (flush_domains & I915_GEM_DOMAIN_GTT)
632
		wmb();
648
		wmb();
633
 
649
 
634
	/* Unconditionally invalidate gpu caches and ensure that we do flush
650
	/* Unconditionally invalidate gpu caches and ensure that we do flush
635
	 * any residual writes from the previous batch.
651
	 * any residual writes from the previous batch.
636
	 */
652
	 */
637
	return logical_ring_invalidate_all_caches(ringbuf);
653
	return logical_ring_invalidate_all_caches(req);
-
 
654
}
-
 
655
 
-
 
656
int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
-
 
657
{
-
 
658
	int ret;
-
 
659
 
-
 
660
	request->ringbuf = request->ctx->engine[request->ring->id].ringbuf;
-
 
661
 
-
 
662
	if (request->ctx != request->ring->default_context) {
-
 
663
		ret = intel_lr_context_pin(request);
-
 
664
		if (ret)
-
 
665
			return ret;
-
 
666
	}
-
 
667
 
-
 
668
	return 0;
-
 
669
}
-
 
670
 
-
 
671
static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
-
 
672
				       int bytes)
-
 
673
{
-
 
674
	struct intel_ringbuffer *ringbuf = req->ringbuf;
-
 
675
	struct intel_engine_cs *ring = req->ring;
-
 
676
	struct drm_i915_gem_request *target;
-
 
677
	unsigned space;
-
 
678
	int ret;
-
 
679
 
-
 
680
	if (intel_ring_space(ringbuf) >= bytes)
-
 
681
		return 0;
-
 
682
 
-
 
683
	/* The whole point of reserving space is to not wait! */
-
 
684
	WARN_ON(ringbuf->reserved_in_use);
-
 
685
 
-
 
686
	list_for_each_entry(target, &ring->request_list, list) {
-
 
687
		/*
-
 
688
		 * The request queue is per-engine, so can contain requests
-
 
689
		 * from multiple ringbuffers. Here, we must ignore any that
-
 
690
		 * aren't from the ringbuffer we're considering.
-
 
691
		 */
-
 
692
		if (target->ringbuf != ringbuf)
-
 
693
			continue;
-
 
694
 
-
 
695
		/* Would completion of this request free enough space? */
-
 
696
		space = __intel_ring_space(target->postfix, ringbuf->tail,
-
 
697
					   ringbuf->size);
-
 
698
		if (space >= bytes)
-
 
699
			break;
-
 
700
	}
-
 
701
 
-
 
702
	if (WARN_ON(&target->list == &ring->request_list))
-
 
703
		return -ENOSPC;
-
 
704
 
-
 
705
	ret = i915_wait_request(target);
-
 
706
	if (ret)
-
 
707
		return ret;
-
 
708
 
-
 
709
	ringbuf->space = space;
-
 
710
	return 0;
-
 
711
}
-
 
712
 
-
 
713
/*
-
 
714
 * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
-
 
715
 * @request: Request to advance the logical ringbuffer of.
-
 
716
 *
-
 
717
 * The tail is updated in our logical ringbuffer struct, not in the actual context. What
-
 
718
 * really happens during submission is that the context and current tail will be placed
-
 
719
 * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
-
 
720
 * point, the tail *inside* the context is updated and the ELSP written to.
-
 
721
 */
-
 
722
static void
-
 
723
intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
-
 
724
{
-
 
725
	struct intel_engine_cs *ring = request->ring;
-
 
726
	struct drm_i915_private *dev_priv = request->i915;
-
 
727
 
-
 
728
	intel_logical_ring_advance(request->ringbuf);
-
 
729
 
-
 
730
	request->tail = request->ringbuf->tail;
-
 
731
 
-
 
732
	if (intel_ring_stopped(ring))
-
 
733
		return;
-
 
734
 
-
 
735
	if (dev_priv->guc.execbuf_client)
-
 
736
		i915_guc_submit(dev_priv->guc.execbuf_client, request);
-
 
737
	else
-
 
738
		execlists_context_queue(request);
-
 
739
}
-
 
740
 
-
 
741
static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
-
 
742
{
-
 
743
	uint32_t __iomem *virt;
-
 
744
	int rem = ringbuf->size - ringbuf->tail;
-
 
745
 
-
 
746
	virt = ringbuf->virtual_start + ringbuf->tail;
-
 
747
	rem /= 4;
-
 
748
	while (rem--)
-
 
749
		iowrite32(MI_NOOP, virt++);
-
 
750
 
-
 
751
	ringbuf->tail = 0;
-
 
752
	intel_ring_update_space(ringbuf);
-
 
753
}
-
 
754
 
-
 
755
static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
-
 
756
{
-
 
757
	struct intel_ringbuffer *ringbuf = req->ringbuf;
-
 
758
	int remain_usable = ringbuf->effective_size - ringbuf->tail;
-
 
759
	int remain_actual = ringbuf->size - ringbuf->tail;
-
 
760
	int ret, total_bytes, wait_bytes = 0;
-
 
761
	bool need_wrap = false;
-
 
762
 
-
 
763
	if (ringbuf->reserved_in_use)
-
 
764
		total_bytes = bytes;
-
 
765
	else
-
 
766
		total_bytes = bytes + ringbuf->reserved_size;
-
 
767
 
-
 
768
	if (unlikely(bytes > remain_usable)) {
-
 
769
		/*
-
 
770
		 * Not enough space for the basic request. So need to flush
-
 
771
		 * out the remainder and then wait for base + reserved.
-
 
772
		 */
-
 
773
		wait_bytes = remain_actual + total_bytes;
-
 
774
		need_wrap = true;
-
 
775
	} else {
-
 
776
		if (unlikely(total_bytes > remain_usable)) {
-
 
777
			/*
-
 
778
			 * The base request will fit but the reserved space
-
 
779
			 * falls off the end. So only need to to wait for the
-
 
780
			 * reserved size after flushing out the remainder.
-
 
781
			 */
-
 
782
			wait_bytes = remain_actual + ringbuf->reserved_size;
-
 
783
			need_wrap = true;
-
 
784
		} else if (total_bytes > ringbuf->space) {
-
 
785
			/* No wrapping required, just waiting. */
-
 
786
			wait_bytes = total_bytes;
-
 
787
		}
-
 
788
	}
-
 
789
 
-
 
790
	if (wait_bytes) {
-
 
791
		ret = logical_ring_wait_for_space(req, wait_bytes);
-
 
792
		if (unlikely(ret))
-
 
793
			return ret;
-
 
794
 
-
 
795
		if (need_wrap)
-
 
796
			__wrap_ring_buffer(ringbuf);
-
 
797
	}
-
 
798
 
-
 
799
	return 0;
-
 
800
}
-
 
801
 
-
 
802
/**
-
 
803
 * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
-
 
804
 *
-
 
805
 * @req: The request to start some new work for
-
 
806
 * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
-
 
807
 *
-
 
808
 * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
-
 
809
 * be wrapped, or wait a bit for the tail to be updated). This function takes care of that
-
 
810
 * and also preallocates a request (every workload submission is still mediated through
-
 
811
 * requests, same as it did with legacy ringbuffer submission).
-
 
812
 *
-
 
813
 * Return: non-zero if the ringbuffer is not ready to be written to.
-
 
814
 */
-
 
815
int intel_logical_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
-
 
816
{
-
 
817
	struct drm_i915_private *dev_priv;
-
 
818
	int ret;
-
 
819
 
-
 
820
	WARN_ON(req == NULL);
-
 
821
	dev_priv = req->ring->dev->dev_private;
-
 
822
 
-
 
823
	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-
 
824
				   dev_priv->mm.interruptible);
-
 
825
	if (ret)
-
 
826
		return ret;
-
 
827
 
-
 
828
	ret = logical_ring_prepare(req, num_dwords * sizeof(uint32_t));
-
 
829
	if (ret)
-
 
830
		return ret;
-
 
831
 
-
 
832
	req->ringbuf->space -= num_dwords * sizeof(uint32_t);
-
 
833
	return 0;
-
 
834
}
-
 
835
 
-
 
836
int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request)
-
 
837
{
-
 
838
	/*
-
 
839
	 * The first call merely notes the reserve request and is common for
-
 
840
	 * all back ends. The subsequent localised _begin() call actually
-
 
841
	 * ensures that the reservation is available. Without the begin, if
-
 
842
	 * the request creator immediately submitted the request without
-
 
843
	 * adding any commands to it then there might not actually be
-
 
844
	 * sufficient room for the submission commands.
-
 
845
	 */
-
 
846
	intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
-
 
847
 
-
 
848
	return intel_logical_ring_begin(request, 0);
638
}
849
}
639
 
850
 
640
/**
851
/**
641
 * execlists_submission() - submit a batchbuffer for execution, Execlists style
852
 * execlists_submission() - submit a batchbuffer for execution, Execlists style
642
 * @dev: DRM device.
853
 * @dev: DRM device.
643
 * @file: DRM file.
854
 * @file: DRM file.
644
 * @ring: Engine Command Streamer to submit to.
855
 * @ring: Engine Command Streamer to submit to.
645
 * @ctx: Context to employ for this submission.
856
 * @ctx: Context to employ for this submission.
646
 * @args: execbuffer call arguments.
857
 * @args: execbuffer call arguments.
647
 * @vmas: list of vmas.
858
 * @vmas: list of vmas.
648
 * @batch_obj: the batchbuffer to submit.
859
 * @batch_obj: the batchbuffer to submit.
649
 * @exec_start: batchbuffer start virtual address pointer.
860
 * @exec_start: batchbuffer start virtual address pointer.
650
 * @flags: translated execbuffer call flags.
861
 * @dispatch_flags: translated execbuffer call flags.
651
 *
862
 *
652
 * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts
863
 * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts
653
 * away the submission details of the execbuffer ioctl call.
864
 * away the submission details of the execbuffer ioctl call.
654
 *
865
 *
655
 * Return: non-zero if the submission fails.
866
 * Return: non-zero if the submission fails.
656
 */
867
 */
657
int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
868
int intel_execlists_submission(struct i915_execbuffer_params *params,
658
			       struct intel_engine_cs *ring,
-
 
659
			       struct intel_context *ctx,
-
 
660
			       struct drm_i915_gem_execbuffer2 *args,
869
			       struct drm_i915_gem_execbuffer2 *args,
661
			       struct list_head *vmas,
870
			       struct list_head *vmas)
662
			       struct drm_i915_gem_object *batch_obj,
-
 
663
			       u64 exec_start, u32 flags)
-
 
664
{
871
{
-
 
872
	struct drm_device       *dev = params->dev;
-
 
873
	struct intel_engine_cs  *ring = params->ring;
665
	struct drm_i915_private *dev_priv = dev->dev_private;
874
	struct drm_i915_private *dev_priv = dev->dev_private;
666
	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
875
	struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf;
-
 
876
	u64 exec_start;
667
	int instp_mode;
877
	int instp_mode;
668
	u32 instp_mask;
878
	u32 instp_mask;
669
	int ret;
879
	int ret;
670
 
880
 
671
	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
881
	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
672
	instp_mask = I915_EXEC_CONSTANTS_MASK;
882
	instp_mask = I915_EXEC_CONSTANTS_MASK;
673
	switch (instp_mode) {
883
	switch (instp_mode) {
674
	case I915_EXEC_CONSTANTS_REL_GENERAL:
884
	case I915_EXEC_CONSTANTS_REL_GENERAL:
675
	case I915_EXEC_CONSTANTS_ABSOLUTE:
885
	case I915_EXEC_CONSTANTS_ABSOLUTE:
676
	case I915_EXEC_CONSTANTS_REL_SURFACE:
886
	case I915_EXEC_CONSTANTS_REL_SURFACE:
677
		if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
887
		if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
678
			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
888
			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
679
			return -EINVAL;
889
			return -EINVAL;
680
		}
890
		}
681
 
891
 
682
		if (instp_mode != dev_priv->relative_constants_mode) {
892
		if (instp_mode != dev_priv->relative_constants_mode) {
683
			if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
893
			if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
684
				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
894
				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
685
				return -EINVAL;
895
				return -EINVAL;
686
			}
896
			}
687
 
897
 
688
			/* The HW changed the meaning on this bit on gen6 */
898
			/* The HW changed the meaning on this bit on gen6 */
689
			instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
899
			instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
690
		}
900
		}
691
		break;
901
		break;
692
	default:
902
	default:
693
		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
903
		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
694
		return -EINVAL;
904
		return -EINVAL;
695
	}
905
	}
696
 
-
 
697
	if (args->num_cliprects != 0) {
-
 
698
		DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
-
 
699
		return -EINVAL;
-
 
700
	} else {
-
 
701
		if (args->DR4 == 0xffffffff) {
-
 
702
			DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
-
 
703
			args->DR4 = 0;
-
 
704
		}
-
 
705
 
-
 
706
		if (args->DR1 || args->DR4 || args->cliprects_ptr) {
-
 
707
			DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
-
 
708
			return -EINVAL;
-
 
709
		}
-
 
710
	}
-
 
711
 
906
 
712
	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
907
	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
713
		DRM_DEBUG("sol reset is gen7 only\n");
908
		DRM_DEBUG("sol reset is gen7 only\n");
714
		return -EINVAL;
909
		return -EINVAL;
715
	}
910
	}
716
 
911
 
717
	ret = execlists_move_to_gpu(ringbuf, vmas);
912
	ret = execlists_move_to_gpu(params->request, vmas);
718
	if (ret)
913
	if (ret)
719
		return ret;
914
		return ret;
720
 
915
 
721
	if (ring == &dev_priv->ring[RCS] &&
916
	if (ring == &dev_priv->ring[RCS] &&
722
	    instp_mode != dev_priv->relative_constants_mode) {
917
	    instp_mode != dev_priv->relative_constants_mode) {
723
		ret = intel_logical_ring_begin(ringbuf, 4);
918
		ret = intel_logical_ring_begin(params->request, 4);
724
		if (ret)
919
		if (ret)
725
			return ret;
920
			return ret;
726
 
921
 
727
		intel_logical_ring_emit(ringbuf, MI_NOOP);
922
		intel_logical_ring_emit(ringbuf, MI_NOOP);
728
		intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
923
		intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
729
		intel_logical_ring_emit(ringbuf, INSTPM);
924
		intel_logical_ring_emit(ringbuf, INSTPM);
730
		intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode);
925
		intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode);
731
		intel_logical_ring_advance(ringbuf);
926
		intel_logical_ring_advance(ringbuf);
732
 
927
 
733
		dev_priv->relative_constants_mode = instp_mode;
928
		dev_priv->relative_constants_mode = instp_mode;
734
	}
929
	}
-
 
930
 
-
 
931
	exec_start = params->batch_obj_vm_offset +
-
 
932
		     args->batch_start_offset;
735
 
933
 
736
	ret = ring->emit_bb_start(ringbuf, exec_start, flags);
934
	ret = ring->emit_bb_start(params->request, exec_start, params->dispatch_flags);
737
	if (ret)
935
	if (ret)
738
		return ret;
936
		return ret;
-
 
937
 
-
 
938
	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
739
 
939
 
740
	i915_gem_execbuffer_move_to_active(vmas, ring);
940
	i915_gem_execbuffer_move_to_active(vmas, params->request);
741
	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
941
	i915_gem_execbuffer_retire_commands(params);
742
 
942
 
743
	return 0;
943
	return 0;
744
}
944
}
745
 
945
 
746
void intel_execlists_retire_requests(struct intel_engine_cs *ring)
946
void intel_execlists_retire_requests(struct intel_engine_cs *ring)
747
{
947
{
748
	struct intel_ctx_submit_request *req, *tmp;
948
	struct drm_i915_gem_request *req, *tmp;
749
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-
 
750
	unsigned long flags;
-
 
751
	struct list_head retired_list;
949
	struct list_head retired_list;
752
 
950
 
753
	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
951
	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
754
	if (list_empty(&ring->execlist_retired_req_list))
952
	if (list_empty(&ring->execlist_retired_req_list))
755
		return;
953
		return;
756
 
954
 
757
	INIT_LIST_HEAD(&retired_list);
955
	INIT_LIST_HEAD(&retired_list);
758
	spin_lock_irqsave(&ring->execlist_lock, flags);
956
	spin_lock_irq(&ring->execlist_lock);
759
	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
957
	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
760
	spin_unlock_irqrestore(&ring->execlist_lock, flags);
958
	spin_unlock_irq(&ring->execlist_lock);
761
 
959
 
762
	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
960
	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
763
		struct intel_context *ctx = req->ctx;
961
		struct intel_context *ctx = req->ctx;
764
		struct drm_i915_gem_object *ctx_obj =
962
		struct drm_i915_gem_object *ctx_obj =
765
				ctx->engine[ring->id].state;
963
				ctx->engine[ring->id].state;
766
 
964
 
767
		if (ctx_obj && (ctx != ring->default_context))
965
		if (ctx_obj && (ctx != ring->default_context))
768
			intel_lr_context_unpin(ring, ctx);
966
			intel_lr_context_unpin(req);
769
		intel_runtime_pm_put(dev_priv);
-
 
770
		i915_gem_context_unreference(req->ctx);
-
 
771
		list_del(&req->execlist_link);
967
		list_del(&req->execlist_link);
772
		kfree(req);
968
		i915_gem_request_unreference(req);
773
	}
969
	}
774
}
970
}
775
 
971
 
776
void intel_logical_ring_stop(struct intel_engine_cs *ring)
972
void intel_logical_ring_stop(struct intel_engine_cs *ring)
777
{
973
{
778
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
974
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
779
	int ret;
975
	int ret;
780
 
976
 
781
	if (!intel_ring_initialized(ring))
977
	if (!intel_ring_initialized(ring))
782
		return;
978
		return;
783
 
979
 
784
	ret = intel_ring_idle(ring);
980
	ret = intel_ring_idle(ring);
785
	if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
981
	if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
786
		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
982
		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
787
			  ring->name, ret);
983
			  ring->name, ret);
788
 
984
 
789
	/* TODO: Is this correct with Execlists enabled? */
985
	/* TODO: Is this correct with Execlists enabled? */
790
	I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
986
	I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
791
	if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
987
	if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
792
		DRM_ERROR("%s :timed out trying to stop ring\n", ring->name);
988
		DRM_ERROR("%s :timed out trying to stop ring\n", ring->name);
793
		return;
989
		return;
794
	}
990
	}
795
	I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
991
	I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
796
}
992
}
797
 
993
 
798
int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf)
994
int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
799
{
995
{
800
	struct intel_engine_cs *ring = ringbuf->ring;
996
	struct intel_engine_cs *ring = req->ring;
801
	int ret;
997
	int ret;
802
 
998
 
803
	if (!ring->gpu_caches_dirty)
999
	if (!ring->gpu_caches_dirty)
804
		return 0;
1000
		return 0;
805
 
1001
 
806
	ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS);
1002
	ret = ring->emit_flush(req, 0, I915_GEM_GPU_DOMAINS);
807
	if (ret)
1003
	if (ret)
808
		return ret;
1004
		return ret;
809
 
1005
 
810
	ring->gpu_caches_dirty = false;
1006
	ring->gpu_caches_dirty = false;
811
	return 0;
1007
	return 0;
812
}
1008
}
813
 
-
 
814
/**
-
 
815
 * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
-
 
816
 * @ringbuf: Logical Ringbuffer to advance.
-
 
817
 *
-
 
818
 * The tail is updated in our logical ringbuffer struct, not in the actual context. What
-
 
819
 * really happens during submission is that the context and current tail will be placed
-
 
820
 * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
-
 
821
 * point, the tail *inside* the context is updated and the ELSP written to.
-
 
822
 */
1009
 
823
void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
-
 
824
{
1010
static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
825
	struct intel_engine_cs *ring = ringbuf->ring;
-
 
826
	struct intel_context *ctx = ringbuf->FIXME_lrc_ctx;
-
 
827
 
-
 
828
	intel_logical_ring_advance(ringbuf);
-
 
829
 
-
 
830
	if (intel_ring_stopped(ring))
-
 
831
		return;
-
 
832
 
-
 
833
	execlists_context_queue(ring, ctx, ringbuf->tail);
-
 
834
}
-
 
835
 
-
 
836
static int intel_lr_context_pin(struct intel_engine_cs *ring,
1011
		struct drm_i915_gem_object *ctx_obj,
837
		struct intel_context *ctx)
1012
		struct intel_ringbuffer *ringbuf)
838
{
1013
{
839
	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
1014
	struct drm_device *dev = ring->dev;
840
	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
1015
	struct drm_i915_private *dev_priv = dev->dev_private;
841
	int ret = 0;
1016
	int ret = 0;
842
 
1017
 
843
	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
1018
	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
844
	if (ctx->engine[ring->id].unpin_count++ == 0) {
-
 
845
		ret = i915_gem_obj_ggtt_pin(ctx_obj,
1019
	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
846
				GEN8_LR_CONTEXT_ALIGN, 0);
1020
			PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
847
		if (ret)
1021
	if (ret)
848
			goto reset_unpin_count;
1022
		return ret;
849
 
1023
 
850
		ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
1024
	ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
851
		if (ret)
1025
	if (ret)
852
			goto unpin_ctx_obj;
1026
		goto unpin_ctx_obj;
-
 
1027
 
-
 
1028
	ctx_obj->dirty = true;
853
	}
1029
 
-
 
1030
	/* Invalidate GuC TLB. */
-
 
1031
	if (i915.enable_guc_submission)
-
 
1032
		I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
854
 
1033
 
855
	return ret;
1034
	return ret;
856
 
1035
 
857
unpin_ctx_obj:
1036
unpin_ctx_obj:
858
	i915_gem_object_ggtt_unpin(ctx_obj);
1037
	i915_gem_object_ggtt_unpin(ctx_obj);
859
reset_unpin_count:
-
 
860
	ctx->engine[ring->id].unpin_count = 0;
-
 
861
 
1038
 
862
	return ret;
1039
	return ret;
863
}
1040
}
864
 
1041
 
865
void intel_lr_context_unpin(struct intel_engine_cs *ring,
-
 
866
		struct intel_context *ctx)
1042
static int intel_lr_context_pin(struct drm_i915_gem_request *rq)
-
 
1043
{
-
 
1044
	int ret = 0;
867
{
1045
	struct intel_engine_cs *ring = rq->ring;
868
	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
1046
	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-
 
1047
	struct intel_ringbuffer *ringbuf = rq->ringbuf;
-
 
1048
 
-
 
1049
	if (rq->ctx->engine[ring->id].pin_count++ == 0) {
-
 
1050
		ret = intel_lr_context_do_pin(ring, ctx_obj, ringbuf);
-
 
1051
		if (ret)
-
 
1052
			goto reset_pin_count;
-
 
1053
	}
-
 
1054
	return ret;
-
 
1055
 
-
 
1056
reset_pin_count:
-
 
1057
	rq->ctx->engine[ring->id].pin_count = 0;
-
 
1058
	return ret;
-
 
1059
}
-
 
1060
 
-
 
1061
void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
-
 
1062
{
-
 
1063
	struct intel_engine_cs *ring = rq->ring;
-
 
1064
	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
869
	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
1065
	struct intel_ringbuffer *ringbuf = rq->ringbuf;
870
 
1066
 
871
	if (ctx_obj) {
1067
	if (ctx_obj) {
872
		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
1068
		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
873
		if (--ctx->engine[ring->id].unpin_count == 0) {
1069
		if (--rq->ctx->engine[ring->id].pin_count == 0) {
874
			intel_unpin_ringbuffer_obj(ringbuf);
1070
			intel_unpin_ringbuffer_obj(ringbuf);
875
			i915_gem_object_ggtt_unpin(ctx_obj);
1071
			i915_gem_object_ggtt_unpin(ctx_obj);
876
		}
1072
		}
877
	}
1073
	}
878
}
1074
}
879
 
1075
 
880
static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
-
 
881
				    struct intel_context *ctx)
1076
static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
882
{
1077
{
-
 
1078
	int ret, i;
-
 
1079
	struct intel_engine_cs *ring = req->ring;
-
 
1080
	struct intel_ringbuffer *ringbuf = req->ringbuf;
-
 
1081
	struct drm_device *dev = ring->dev;
-
 
1082
	struct drm_i915_private *dev_priv = dev->dev_private;
883
	int ret;
1083
	struct i915_workarounds *w = &dev_priv->workarounds;
884
 
1084
 
885
	if (ring->outstanding_lazy_seqno)
1085
	if (WARN_ON_ONCE(w->count == 0))
886
		return 0;
1086
		return 0;
887
 
-
 
888
	if (ring->preallocated_lazy_request == NULL) {
-
 
889
		struct drm_i915_gem_request *request;
1087
 
890
 
1088
	ring->gpu_caches_dirty = true;
891
		request = kmalloc(sizeof(*request), GFP_KERNEL);
-
 
892
		if (request == NULL)
1089
	ret = logical_ring_flush_all_caches(req);
893
			return -ENOMEM;
1090
	if (ret)
894
 
-
 
895
		if (ctx != ring->default_context) {
1091
		return ret;
-
 
1092
 
-
 
1093
	ret = intel_logical_ring_begin(req, w->count * 2 + 2);
-
 
1094
	if (ret)
-
 
1095
		return ret;
-
 
1096
 
896
			ret = intel_lr_context_pin(ring, ctx);
1097
	intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count));
897
			if (ret) {
-
 
-
 
1098
	for (i = 0; i < w->count; i++) {
898
				kfree(request);
-
 
899
				return ret;
-
 
900
			}
-
 
901
		}
-
 
902
 
-
 
903
		/* Hold a reference to the context this request belongs to
1099
		intel_logical_ring_emit(ringbuf, w->reg[i].addr);
904
		 * (we will need it when the time comes to emit/retire the
1100
		intel_logical_ring_emit(ringbuf, w->reg[i].value);
-
 
1101
	}
905
		 * request).
1102
	intel_logical_ring_emit(ringbuf, MI_NOOP);
-
 
1103
 
906
		 */
1104
	intel_logical_ring_advance(ringbuf);
907
		request->ctx = ctx;
1105
 
908
		i915_gem_context_reference(request->ctx);
1106
	ring->gpu_caches_dirty = true;
909
 
1107
	ret = logical_ring_flush_all_caches(req);
910
		ring->preallocated_lazy_request = request;
-
 
911
	}
1108
	if (ret)
-
 
1109
		return ret;
912
 
1110
 
913
	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
1111
	return 0;
-
 
1112
}
914
}
1113
 
915
 
-
 
916
static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
-
 
917
				     int bytes)
-
 
-
 
1114
#define wa_ctx_emit(batch, index, cmd)					\
-
 
1115
	do {								\
-
 
1116
		int __index = (index)++;				\
-
 
1117
		if (WARN_ON(__index >= (PAGE_SIZE / sizeof(uint32_t)))) { \
-
 
1118
			return -ENOSPC;					\
-
 
1119
		}							\
-
 
1120
		batch[__index] = (cmd);					\
-
 
1121
	} while (0)
-
 
1122
 
-
 
1123
 
-
 
1124
/*
-
 
1125
 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
-
 
1126
 * PIPE_CONTROL instruction. This is required for the flush to happen correctly
918
{
1127
 * but there is a slight complication as this is applied in WA batch where the
-
 
1128
 * values are only initialized once so we cannot take register value at the
-
 
1129
 * beginning and reuse it further; hence we save its value to memory, upload a
-
 
1130
 * constant value with bit21 set and then we restore it back with the saved value.
919
	struct intel_engine_cs *ring = ringbuf->ring;
1131
 * To simplify the WA, a constant value is formed by using the default value
920
	struct drm_i915_gem_request *request;
1132
 * of this register. This shouldn't be a problem because we are only modifying
-
 
1133
 * it for a short period and this batch in non-premptible. We can ofcourse
-
 
1134
 * use additional instructions that read the actual value of the register
-
 
1135
 * at that time and set our bit of interest but it makes the WA complicated.
921
	u32 seqno = 0;
1136
 *
-
 
1137
 * This WA is also required for Gen9 so extracting as a function avoids
-
 
1138
 * code duplication.
-
 
1139
 */
-
 
1140
static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *ring,
-
 
1141
						uint32_t *const batch,
-
 
1142
						uint32_t index)
-
 
1143
{
922
	int ret;
1144
	uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
923
 
1145
 
924
	if (ringbuf->last_retired_head != -1) {
1146
	/*
925
		ringbuf->head = ringbuf->last_retired_head;
1147
	 * WaDisableLSQCROPERFforOCL:skl
926
		ringbuf->last_retired_head = -1;
1148
	 * This WA is implemented in skl_init_clock_gating() but since
927
 
-
 
928
		ringbuf->space = intel_ring_space(ringbuf);
-
 
-
 
1149
	 * this batch updates GEN8_L3SQCREG4 with default value we need to
929
		if (ringbuf->space >= bytes)
1150
	 * set this bit here to retain the WA during flush.
930
			return 0;
1151
	 */
-
 
1152
	if (IS_SKYLAKE(ring->dev) && INTEL_REVID(ring->dev) <= SKL_REVID_E0)
-
 
1153
		l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
-
 
1154
 
931
	}
1155
	wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
932
 
1156
				   MI_SRM_LRM_GLOBAL_GTT));
-
 
1157
	wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
933
	list_for_each_entry(request, &ring->request_list, list) {
1158
	wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
-
 
1159
	wa_ctx_emit(batch, index, 0);
-
 
1160
 
934
		if (__intel_ring_space(request->tail, ringbuf->tail,
1161
	wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
935
				       ringbuf->size) >= bytes) {
1162
	wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
936
			seqno = request->seqno;
1163
	wa_ctx_emit(batch, index, l3sqc4_flush);
937
			break;
-
 
938
		}
1164
 
939
	}
1165
	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
940
 
1166
	wa_ctx_emit(batch, index, (PIPE_CONTROL_CS_STALL |
941
	if (seqno == 0)
1167
				   PIPE_CONTROL_DC_FLUSH_ENABLE));
-
 
1168
	wa_ctx_emit(batch, index, 0);
942
		return -ENOSPC;
1169
	wa_ctx_emit(batch, index, 0);
943
 
-
 
944
	ret = i915_wait_seqno(ring, seqno);
-
 
945
	if (ret)
1170
	wa_ctx_emit(batch, index, 0);
946
		return ret;
-
 
947
 
1171
	wa_ctx_emit(batch, index, 0);
948
	i915_gem_retire_requests_ring(ring);
1172
 
949
	ringbuf->head = ringbuf->last_retired_head;
1173
	wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
950
	ringbuf->last_retired_head = -1;
1174
				   MI_SRM_LRM_GLOBAL_GTT));
-
 
1175
	wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
-
 
1176
	wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
-
 
1177
	wa_ctx_emit(batch, index, 0);
-
 
1178
 
-
 
1179
	return index;
-
 
1180
}
-
 
1181
 
-
 
1182
static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx,
-
 
1183
				    uint32_t offset,
-
 
1184
				    uint32_t start_alignment)
-
 
1185
{
-
 
1186
	return wa_ctx->offset = ALIGN(offset, start_alignment);
-
 
1187
}
-
 
1188
 
-
 
1189
static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx,
-
 
1190
			     uint32_t offset,
-
 
1191
			     uint32_t size_alignment)
951
 
1192
{
952
	ringbuf->space = intel_ring_space(ringbuf);
1193
	wa_ctx->size = offset - wa_ctx->offset;
-
 
1194
 
-
 
1195
	WARN(wa_ctx->size % size_alignment,
-
 
1196
	     "wa_ctx_bb failed sanity checks: size %d is not aligned to %d\n",
-
 
1197
	     wa_ctx->size, size_alignment);
-
 
1198
	return 0;
-
 
1199
}
-
 
1200
 
-
 
1201
/**
-
 
1202
 * gen8_init_indirectctx_bb() - initialize indirect ctx batch with WA
-
 
1203
 *
-
 
1204
 * @ring: only applicable for RCS
-
 
1205
 * @wa_ctx: structure representing wa_ctx
-
 
1206
 *  offset: specifies start of the batch, should be cache-aligned. This is updated
-
 
1207
 *    with the offset value received as input.
-
 
1208
 *  size: size of the batch in DWORDS but HW expects in terms of cachelines
-
 
1209
 * @batch: page in which WA are loaded
953
	return 0;
1210
 * @offset: This field specifies the start of the batch, it should be
-
 
1211
 *  cache-aligned otherwise it is adjusted accordingly.
-
 
1212
 *  Typically we only have one indirect_ctx and per_ctx batch buffer which are
-
 
1213
 *  initialized at the beginning and shared across all contexts but this field
-
 
1214
 *  helps us to have multiple batches at different offsets and select them based
-
 
1215
 *  on a criteria. At the moment this batch always start at the beginning of the page
954
}
1216
 *  and at this point we don't have multiple wa_ctx batch buffers.
-
 
1217
 *
-
 
1218
 *  The number of WA applied are not known at the beginning; we use this field
-
 
1219
 *  to return the no of DWORDS written.
-
 
1220
 *
955
 
1221
 *  It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
-
 
1222
 *  so it adds NOOPs as padding to make it cacheline aligned.
956
static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
1223
 *  MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
-
 
1224
 *  makes a complete batch buffer.
-
 
1225
 *
-
 
1226
 * Return: non-zero if we exceed the PAGE_SIZE limit.
-
 
1227
 */
-
 
1228
 
-
 
1229
static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
957
				       int bytes)
1230
				    struct i915_wa_ctx_bb *wa_ctx,
-
 
1231
				    uint32_t *const batch,
-
 
1232
				    uint32_t *offset)
-
 
1233
{
-
 
1234
	uint32_t scratch_addr;
-
 
1235
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
-
 
1236
 
-
 
1237
	/* WaDisableCtxRestoreArbitration:bdw,chv */
-
 
1238
	wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
-
 
1239
 
-
 
1240
	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
-
 
1241
	if (IS_BROADWELL(ring->dev)) {
-
 
1242
		int rc = gen8_emit_flush_coherentl3_wa(ring, batch, index);
958
{
1243
		if (rc < 0)
-
 
1244
			return rc;
-
 
1245
		index = rc;
-
 
1246
	}
-
 
1247
 
-
 
1248
	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
-
 
1249
	/* Actual scratch location is at 128 bytes offset */
-
 
1250
	scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
-
 
1251
 
959
	struct intel_engine_cs *ring = ringbuf->ring;
-
 
960
	struct drm_device *dev = ring->dev;
-
 
961
	struct drm_i915_private *dev_priv = dev->dev_private;
1252
	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
962
	unsigned long end;
-
 
963
	int ret;
-
 
964
 
-
 
965
	ret = logical_ring_wait_request(ringbuf, bytes);
1253
	wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
-
 
1254
				   PIPE_CONTROL_GLOBAL_GTT_IVB |
-
 
1255
				   PIPE_CONTROL_CS_STALL |
-
 
1256
				   PIPE_CONTROL_QW_WRITE));
-
 
1257
	wa_ctx_emit(batch, index, scratch_addr);
-
 
1258
	wa_ctx_emit(batch, index, 0);
-
 
1259
	wa_ctx_emit(batch, index, 0);
-
 
1260
	wa_ctx_emit(batch, index, 0);
-
 
1261
 
-
 
1262
	/* Pad to end of cacheline */
-
 
1263
	while (index % CACHELINE_DWORDS)
-
 
1264
		wa_ctx_emit(batch, index, MI_NOOP);
-
 
1265
 
-
 
1266
	/*
-
 
1267
	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
-
 
1268
	 * execution depends on the length specified in terms of cache lines
-
 
1269
	 * in the register CTX_RCS_INDIRECT_CTX
-
 
1270
	 */
-
 
1271
 
-
 
1272
	return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
-
 
1273
}
966
	if (ret != -ENOSPC)
1274
 
-
 
1275
/**
-
 
1276
 * gen8_init_perctx_bb() - initialize per ctx batch with WA
967
		return ret;
1277
 *
968
 
1278
 * @ring: only applicable for RCS
969
	/* Force the context submission in case we have been skipping it */
-
 
970
	intel_logical_ring_advance_and_submit(ringbuf);
-
 
971
 
1279
 * @wa_ctx: structure representing wa_ctx
972
	/* With GEM the hangcheck timer should kick us out of the loop,
-
 
973
	 * leaving it early runs the risk of corrupting GEM state (due
-
 
974
	 * to running on almost untested codepaths). But on resume
-
 
975
	 * timers don't work yet, so prevent a complete hang in that
-
 
976
	 * case by choosing an insanely large timeout. */
1280
 *  offset: specifies start of the batch, should be cache-aligned.
977
	end = jiffies + 60 * HZ;
1281
 *  size: size of the batch in DWORDS but HW expects in terms of cachelines
978
 
1282
 * @batch: page in which WA are loaded
-
 
1283
 * @offset: This field specifies the start of this batch.
-
 
1284
 *   This batch is started immediately after indirect_ctx batch. Since we ensure
-
 
1285
 *   that indirect_ctx ends on a cacheline this batch is aligned automatically.
979
	do {
1286
 *
980
		ringbuf->head = I915_READ_HEAD(ring);
1287
 *   The number of DWORDS written are returned using this field.
981
		ringbuf->space = intel_ring_space(ringbuf);
1288
 *
-
 
1289
 *  This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding
982
		if (ringbuf->space >= bytes) {
1290
 *  to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant.
-
 
1291
 */
-
 
1292
static int gen8_init_perctx_bb(struct intel_engine_cs *ring,
983
			ret = 0;
1293
			       struct i915_wa_ctx_bb *wa_ctx,
-
 
1294
			       uint32_t *const batch,
-
 
1295
			       uint32_t *offset)
984
			break;
1296
{
985
		}
1297
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
-
 
1298
 
-
 
1299
	/* WaDisableCtxRestoreArbitration:bdw,chv */
-
 
1300
	wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
-
 
1301
 
-
 
1302
	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
-
 
1303
 
-
 
1304
	return wa_ctx_end(wa_ctx, *offset = index, 1);
986
 
1305
}
987
		msleep(1);
1306
 
-
 
1307
static int gen9_init_indirectctx_bb(struct intel_engine_cs *ring,
988
 
1308
				    struct i915_wa_ctx_bb *wa_ctx,
989
		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
1309
				    uint32_t *const batch,
-
 
1310
				    uint32_t *offset)
990
					   dev_priv->mm.interruptible);
1311
{
-
 
1312
	int ret;
-
 
1313
	struct drm_device *dev = ring->dev;
-
 
1314
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
-
 
1315
 
-
 
1316
	/* WaDisableCtxRestoreArbitration:skl,bxt */
-
 
1317
	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_D0)) ||
991
		if (ret)
1318
	    (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0)))
-
 
1319
		wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
992
			break;
1320
 
-
 
1321
	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */
-
 
1322
	ret = gen8_emit_flush_coherentl3_wa(ring, batch, index);
-
 
1323
	if (ret < 0)
-
 
1324
		return ret;
-
 
1325
	index = ret;
-
 
1326
 
-
 
1327
	/* Pad to end of cacheline */
-
 
1328
	while (index % CACHELINE_DWORDS)
993
 
1329
		wa_ctx_emit(batch, index, MI_NOOP);
994
		if (time_after(jiffies, end)) {
1330
 
995
			ret = -EBUSY;
1331
	return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
996
			break;
1332
}
997
		}
1333
 
998
	} while (1);
1334
static int gen9_init_perctx_bb(struct intel_engine_cs *ring,
999
 
1335
			       struct i915_wa_ctx_bb *wa_ctx,
1000
	return ret;
1336
			       uint32_t *const batch,
1001
}
1337
			       uint32_t *offset)
1002
 
1338
{
1003
static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf)
1339
	struct drm_device *dev = ring->dev;
1004
{
1340
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
1005
	uint32_t __iomem *virt;
1341
 
1006
	int rem = ringbuf->size - ringbuf->tail;
1342
	/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
1007
 
1343
	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_B0)) ||
1008
	if (ringbuf->space < rem) {
1344
	    (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0))) {
1009
		int ret = logical_ring_wait_for_space(ringbuf, rem);
1345
		wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
1010
 
1346
		wa_ctx_emit(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0);
1011
		if (ret)
1347
		wa_ctx_emit(batch, index,
1012
			return ret;
1348
			    _MASKED_BIT_ENABLE(DISABLE_PIXEL_MASK_CAMMING));
1013
	}
1349
		wa_ctx_emit(batch, index, MI_NOOP);
1014
 
1350
	}
1015
	virt = ringbuf->virtual_start + ringbuf->tail;
1351
 
1016
	rem /= 4;
1352
	/* WaDisableCtxRestoreArbitration:skl,bxt */
1017
	while (rem--)
1353
	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_D0)) ||
1018
		iowrite32(MI_NOOP, virt++);
1354
	    (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0)))
1019
 
1355
		wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
1020
	ringbuf->tail = 0;
1356
 
1021
	ringbuf->space = intel_ring_space(ringbuf);
1357
	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
1022
 
1358
 
1023
	return 0;
1359
	return wa_ctx_end(wa_ctx, *offset = index, 1);
1024
}
1360
}
1025
 
1361
 
1026
static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
1362
static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *ring, u32 size)
1027
{
1363
{
1028
	int ret;
1364
	int ret;
1029
 
1365
 
1030
	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
1366
	ring->wa_ctx.obj = i915_gem_alloc_object(ring->dev, PAGE_ALIGN(size));
1031
		ret = logical_ring_wrap_buffer(ringbuf);
1367
	if (!ring->wa_ctx.obj) {
1032
		if (unlikely(ret))
1368
		DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n");
1033
			return ret;
1369
		return -ENOMEM;
1034
	}
1370
	}
-
 
1371
 
1035
 
1372
	ret = i915_gem_obj_ggtt_pin(ring->wa_ctx.obj, PAGE_SIZE, 0);
1036
	if (unlikely(ringbuf->space < bytes)) {
1373
	if (ret) {
-
 
1374
		DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
1037
		ret = logical_ring_wait_for_space(ringbuf, bytes);
1375
				 ret);
1038
		if (unlikely(ret))
1376
		drm_gem_object_unreference(&ring->wa_ctx.obj->base);
1039
			return ret;
1377
		return ret;
1040
	}
1378
	}
1041
 
1379
 
1042
	return 0;
1380
	return 0;
1043
}
1381
}
1044
 
-
 
1045
/**
-
 
1046
 * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
-
 
1047
 *
-
 
1048
 * @ringbuf: Logical ringbuffer.
-
 
1049
 * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
-
 
1050
 *
-
 
1051
 * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
-
 
1052
 * be wrapped, or wait a bit for the tail to be updated). This function takes care of that
-
 
1053
 * and also preallocates a request (every workload submission is still mediated through
-
 
1054
 * requests, same as it did with legacy ringbuffer submission).
-
 
1055
 *
-
 
1056
 * Return: non-zero if the ringbuffer is not ready to be written to.
-
 
1057
 */
1382
 
1058
int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
1383
static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *ring)
1059
{
1384
{
1060
	struct intel_engine_cs *ring = ringbuf->ring;
1385
	if (ring->wa_ctx.obj) {
1061
	struct drm_device *dev = ring->dev;
1386
		i915_gem_object_ggtt_unpin(ring->wa_ctx.obj);
1062
	struct drm_i915_private *dev_priv = dev->dev_private;
1387
		drm_gem_object_unreference(&ring->wa_ctx.obj->base);
1063
	int ret;
1388
		ring->wa_ctx.obj = NULL;
1064
 
-
 
1065
	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-
 
1066
				   dev_priv->mm.interruptible);
1389
	}
1067
	if (ret)
-
 
1068
		return ret;
1390
}
-
 
1391
 
1069
 
1392
static int intel_init_workaround_bb(struct intel_engine_cs *ring)
-
 
1393
{
1070
	ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t));
1394
	int ret;
-
 
1395
	uint32_t *batch;
-
 
1396
	uint32_t offset;
1071
	if (ret)
-
 
1072
		return ret;
-
 
1073
 
-
 
1074
	/* Preallocate the olr before touching the ring */
1397
	struct page *page;
1075
	ret = logical_ring_alloc_seqno(ring, ringbuf->FIXME_lrc_ctx);
1398
	struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
-
 
1399
 
-
 
1400
	WARN_ON(ring->id != RCS);
-
 
1401
 
1076
	if (ret)
1402
	/* update this when WA for higher Gen are added */
1077
		return ret;
1403
	if (INTEL_INFO(ring->dev)->gen > 9) {
1078
 
1404
		DRM_ERROR("WA batch buffer is not initialized for Gen%d\n",
1079
	ringbuf->space -= num_dwords * sizeof(uint32_t);
1405
			  INTEL_INFO(ring->dev)->gen);
1080
	return 0;
1406
		return 0;
1081
}
1407
	}
1082
 
1408
 
1083
static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
1409
	/* some WA perform writes to scratch page, ensure it is valid */
1084
					       struct intel_context *ctx)
-
 
1085
{
-
 
1086
	int ret, i;
1410
	if (ring->scratch.obj == NULL) {
1087
	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
1411
		DRM_ERROR("scratch page not allocated for %s\n", ring->name);
1088
	struct drm_device *dev = ring->dev;
-
 
1089
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
-
 
1412
		return -EINVAL;
1090
	struct i915_workarounds *w = &dev_priv->workarounds;
1413
	}
-
 
1414
 
-
 
1415
	ret = lrc_setup_wa_ctx_obj(ring, PAGE_SIZE);
1091
 
1416
	if (ret) {
-
 
1417
		DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
-
 
1418
		return ret;
1092
	if (WARN_ON(w->count == 0))
1419
	}
-
 
1420
 
-
 
1421
	page = i915_gem_object_get_page(wa_ctx->obj, 0);
-
 
1422
	batch = kmap_atomic(page);
1093
		return 0;
1423
	offset = 0;
-
 
1424
 
-
 
1425
	if (INTEL_INFO(ring->dev)->gen == 8) {
-
 
1426
		ret = gen8_init_indirectctx_bb(ring,
1094
 
1427
					       &wa_ctx->indirect_ctx,
1095
	ring->gpu_caches_dirty = true;
1428
					       batch,
1096
	ret = logical_ring_flush_all_caches(ringbuf);
1429
					       &offset);
-
 
1430
		if (ret)
-
 
1431
			goto out;
-
 
1432
 
1097
	if (ret)
1433
		ret = gen8_init_perctx_bb(ring,
1098
		return ret;
1434
					  &wa_ctx->per_ctx,
-
 
1435
					  batch,
-
 
1436
					  &offset);
-
 
1437
		if (ret)
-
 
1438
			goto out;
-
 
1439
	} else if (INTEL_INFO(ring->dev)->gen == 9) {
-
 
1440
		ret = gen9_init_indirectctx_bb(ring,
-
 
1441
					       &wa_ctx->indirect_ctx,
1099
 
1442
					       batch,
1100
	ret = intel_logical_ring_begin(ringbuf, w->count * 2 + 2);
1443
					       &offset);
-
 
1444
		if (ret)
1101
	if (ret)
1445
			goto out;
-
 
1446
 
1102
		return ret;
1447
		ret = gen9_init_perctx_bb(ring,
1103
 
1448
					  &wa_ctx->per_ctx,
1104
	intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count));
-
 
1105
	for (i = 0; i < w->count; i++) {
-
 
1106
		intel_logical_ring_emit(ringbuf, w->reg[i].addr);
1449
					  batch,
1107
		intel_logical_ring_emit(ringbuf, w->reg[i].value);
1450
					  &offset);
1108
	}
-
 
1109
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1451
		if (ret)
1110
 
1452
			goto out;
1111
	intel_logical_ring_advance(ringbuf);
1453
	}
1112
 
1454
 
1113
	ring->gpu_caches_dirty = true;
1455
out:
1114
	ret = logical_ring_flush_all_caches(ringbuf);
1456
	kunmap_atomic(batch);
1115
	if (ret)
1457
	if (ret)
1116
		return ret;
1458
		lrc_destroy_wa_ctx_obj(ring);
1117
 
1459
 
1118
	return 0;
1460
	return ret;
1119
}
1461
}
1120
 
1462
 
1121
static int gen8_init_common_ring(struct intel_engine_cs *ring)
1463
static int gen8_init_common_ring(struct intel_engine_cs *ring)
1122
{
1464
{
1123
	struct drm_device *dev = ring->dev;
1465
	struct drm_device *dev = ring->dev;
1124
	struct drm_i915_private *dev_priv = dev->dev_private;
1466
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
1467
	u8 next_context_status_buffer_hw;
-
 
1468
 
-
 
1469
	lrc_setup_hardware_status_page(ring,
-
 
1470
				ring->default_context->engine[ring->id].state);
1125
 
1471
 
1126
	I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
1472
	I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
1127
	I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
1473
	I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
-
 
1474
 
-
 
1475
	if (ring->status_page.obj) {
-
 
1476
		I915_WRITE(RING_HWS_PGA(ring->mmio_base),
-
 
1477
			   (u32)ring->status_page.gfx_addr);
-
 
1478
		POSTING_READ(RING_HWS_PGA(ring->mmio_base));
-
 
1479
	}
1128
 
1480
 
1129
	I915_WRITE(RING_MODE_GEN7(ring),
1481
	I915_WRITE(RING_MODE_GEN7(ring),
1130
		   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
1482
		   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
1131
		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
1483
		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
1132
	POSTING_READ(RING_MODE_GEN7(ring));
1484
	POSTING_READ(RING_MODE_GEN7(ring));
-
 
1485
 
-
 
1486
	/*
-
 
1487
	 * Instead of resetting the Context Status Buffer (CSB) read pointer to
-
 
1488
	 * zero, we need to read the write pointer from hardware and use its
-
 
1489
	 * value because "this register is power context save restored".
-
 
1490
	 * Effectively, these states have been observed:
-
 
1491
	 *
-
 
1492
	 *      | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) |
-
 
1493
	 * BDW  | CSB regs not reset       | CSB regs reset       |
-
 
1494
	 * CHT  | CSB regs not reset       | CSB regs not reset   |
-
 
1495
	 */
-
 
1496
	next_context_status_buffer_hw = (I915_READ(RING_CONTEXT_STATUS_PTR(ring))
-
 
1497
						   & GEN8_CSB_PTR_MASK);
-
 
1498
 
-
 
1499
	/*
-
 
1500
	 * When the CSB registers are reset (also after power-up / gpu reset),
-
 
1501
	 * CSB write pointer is set to all 1's, which is not valid, use '5' in
-
 
1502
	 * this special case, so the first element read is CSB[0].
-
 
1503
	 */
-
 
1504
	if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK)
-
 
1505
		next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1);
-
 
1506
 
-
 
1507
	ring->next_context_status_buffer = next_context_status_buffer_hw;
1133
	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name);
1508
	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name);
1134
 
1509
 
1135
	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
1510
	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
1136
 
1511
 
1137
	return 0;
1512
	return 0;
1138
}
1513
}
1139
 
1514
 
1140
static int gen8_init_render_ring(struct intel_engine_cs *ring)
1515
static int gen8_init_render_ring(struct intel_engine_cs *ring)
1141
{
1516
{
1142
	struct drm_device *dev = ring->dev;
1517
	struct drm_device *dev = ring->dev;
1143
	struct drm_i915_private *dev_priv = dev->dev_private;
1518
	struct drm_i915_private *dev_priv = dev->dev_private;
1144
	int ret;
1519
	int ret;
1145
 
1520
 
1146
	ret = gen8_init_common_ring(ring);
1521
	ret = gen8_init_common_ring(ring);
1147
	if (ret)
1522
	if (ret)
1148
		return ret;
1523
		return ret;
1149
 
1524
 
1150
	/* We need to disable the AsyncFlip performance optimisations in order
1525
	/* We need to disable the AsyncFlip performance optimisations in order
1151
	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
1526
	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
1152
	 * programmed to '1' on all products.
1527
	 * programmed to '1' on all products.
1153
	 *
1528
	 *
1154
	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
1529
	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
1155
	 */
1530
	 */
1156
	I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1531
	I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
-
 
1532
 
-
 
1533
	I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
-
 
1534
 
-
 
1535
	return init_workarounds_ring(ring);
-
 
1536
}
-
 
1537
 
-
 
1538
static int gen9_init_render_ring(struct intel_engine_cs *ring)
-
 
1539
{
-
 
1540
	int ret;
1157
 
1541
 
1158
	ret = intel_init_pipe_control(ring);
1542
	ret = gen8_init_common_ring(ring);
1159
	if (ret)
1543
	if (ret)
1160
		return ret;
1544
		return ret;
1161
 
-
 
1162
	I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
-
 
1163
 
1545
 
1164
	return init_workarounds_ring(ring);
1546
	return init_workarounds_ring(ring);
1165
}
1547
}
1166
 
1548
 
1167
static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf,
-
 
1168
			      u64 offset, unsigned flags)
1549
static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
-
 
1550
{
-
 
1551
	struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
-
 
1552
	struct intel_engine_cs *ring = req->ring;
-
 
1553
	struct intel_ringbuffer *ringbuf = req->ringbuf;
-
 
1554
	const int num_lri_cmds = GEN8_LEGACY_PDPES * 2;
-
 
1555
	int i, ret;
-
 
1556
 
-
 
1557
	ret = intel_logical_ring_begin(req, num_lri_cmds * 2 + 2);
-
 
1558
	if (ret)
-
 
1559
		return ret;
-
 
1560
 
-
 
1561
	intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(num_lri_cmds));
-
 
1562
	for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
-
 
1563
		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
-
 
1564
 
-
 
1565
		intel_logical_ring_emit(ringbuf, GEN8_RING_PDP_UDW(ring, i));
-
 
1566
		intel_logical_ring_emit(ringbuf, upper_32_bits(pd_daddr));
-
 
1567
		intel_logical_ring_emit(ringbuf, GEN8_RING_PDP_LDW(ring, i));
-
 
1568
		intel_logical_ring_emit(ringbuf, lower_32_bits(pd_daddr));
-
 
1569
	}
-
 
1570
 
-
 
1571
	intel_logical_ring_emit(ringbuf, MI_NOOP);
-
 
1572
	intel_logical_ring_advance(ringbuf);
-
 
1573
 
-
 
1574
	return 0;
-
 
1575
}
-
 
1576
 
-
 
1577
static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
-
 
1578
			      u64 offset, unsigned dispatch_flags)
-
 
1579
{
1169
{
1580
	struct intel_ringbuffer *ringbuf = req->ringbuf;
1170
	bool ppgtt = !(flags & I915_DISPATCH_SECURE);
1581
	bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE);
-
 
1582
	int ret;
-
 
1583
 
-
 
1584
	/* Don't rely in hw updating PDPs, specially in lite-restore.
-
 
1585
	 * Ideally, we should set Force PD Restore in ctx descriptor,
-
 
1586
	 * but we can't. Force Restore would be a second option, but
-
 
1587
	 * it is unsafe in case of lite-restore (because the ctx is
-
 
1588
	 * not idle). PML4 is allocated during ppgtt init so this is
-
 
1589
	 * not needed in 48-bit.*/
-
 
1590
	if (req->ctx->ppgtt &&
-
 
1591
	    (intel_ring_flag(req->ring) & req->ctx->ppgtt->pd_dirty_rings)) {
-
 
1592
		if (!USES_FULL_48BIT_PPGTT(req->i915) &&
-
 
1593
		    !intel_vgpu_active(req->i915->dev)) {
-
 
1594
			ret = intel_logical_ring_emit_pdps(req);
-
 
1595
			if (ret)
-
 
1596
				return ret;
-
 
1597
		}
-
 
1598
 
-
 
1599
		req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring);
1171
	int ret;
1600
	}
1172
 
1601
 
1173
	ret = intel_logical_ring_begin(ringbuf, 4);
1602
	ret = intel_logical_ring_begin(req, 4);
1174
	if (ret)
1603
	if (ret)
1175
		return ret;
1604
		return ret;
1176
 
1605
 
1177
	/* FIXME(BDW): Address space and security selectors. */
1606
	/* FIXME(BDW): Address space and security selectors. */
1178
	intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
1607
	intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 |
-
 
1608
				(ppgtt<<8) |
-
 
1609
				(dispatch_flags & I915_DISPATCH_RS ?
-
 
1610
				 MI_BATCH_RESOURCE_STREAMER : 0));
1179
	intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
1611
	intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
1180
	intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
1612
	intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
1181
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1613
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1182
	intel_logical_ring_advance(ringbuf);
1614
	intel_logical_ring_advance(ringbuf);
1183
 
1615
 
1184
	return 0;
1616
	return 0;
1185
}
1617
}
1186
 
1618
 
1187
static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring)
1619
static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring)
1188
{
1620
{
1189
	struct drm_device *dev = ring->dev;
1621
	struct drm_device *dev = ring->dev;
1190
	struct drm_i915_private *dev_priv = dev->dev_private;
1622
	struct drm_i915_private *dev_priv = dev->dev_private;
1191
	unsigned long flags;
1623
	unsigned long flags;
1192
 
1624
 
1193
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1625
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1194
		return false;
1626
		return false;
1195
 
1627
 
1196
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1628
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1197
	if (ring->irq_refcount++ == 0) {
1629
	if (ring->irq_refcount++ == 0) {
1198
		I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
1630
		I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
1199
		POSTING_READ(RING_IMR(ring->mmio_base));
1631
		POSTING_READ(RING_IMR(ring->mmio_base));
1200
	}
1632
	}
1201
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1633
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1202
 
1634
 
1203
	return true;
1635
	return true;
1204
}
1636
}
1205
 
1637
 
1206
static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring)
1638
static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring)
1207
{
1639
{
1208
	struct drm_device *dev = ring->dev;
1640
	struct drm_device *dev = ring->dev;
1209
	struct drm_i915_private *dev_priv = dev->dev_private;
1641
	struct drm_i915_private *dev_priv = dev->dev_private;
1210
	unsigned long flags;
1642
	unsigned long flags;
1211
 
1643
 
1212
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1644
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1213
	if (--ring->irq_refcount == 0) {
1645
	if (--ring->irq_refcount == 0) {
1214
		I915_WRITE_IMR(ring, ~ring->irq_keep_mask);
1646
		I915_WRITE_IMR(ring, ~ring->irq_keep_mask);
1215
		POSTING_READ(RING_IMR(ring->mmio_base));
1647
		POSTING_READ(RING_IMR(ring->mmio_base));
1216
	}
1648
	}
1217
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1649
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1218
}
1650
}
1219
 
1651
 
1220
static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
1652
static int gen8_emit_flush(struct drm_i915_gem_request *request,
1221
			   u32 invalidate_domains,
1653
			   u32 invalidate_domains,
1222
			   u32 unused)
1654
			   u32 unused)
1223
{
1655
{
-
 
1656
	struct intel_ringbuffer *ringbuf = request->ringbuf;
1224
	struct intel_engine_cs *ring = ringbuf->ring;
1657
	struct intel_engine_cs *ring = ringbuf->ring;
1225
	struct drm_device *dev = ring->dev;
1658
	struct drm_device *dev = ring->dev;
1226
	struct drm_i915_private *dev_priv = dev->dev_private;
1659
	struct drm_i915_private *dev_priv = dev->dev_private;
1227
	uint32_t cmd;
1660
	uint32_t cmd;
1228
	int ret;
1661
	int ret;
1229
 
1662
 
1230
	ret = intel_logical_ring_begin(ringbuf, 4);
1663
	ret = intel_logical_ring_begin(request, 4);
1231
	if (ret)
1664
	if (ret)
1232
		return ret;
1665
		return ret;
1233
 
1666
 
1234
	cmd = MI_FLUSH_DW + 1;
1667
	cmd = MI_FLUSH_DW + 1;
1235
 
1668
 
1236
	if (ring == &dev_priv->ring[VCS]) {
1669
	/* We always require a command barrier so that subsequent
1237
		if (invalidate_domains & I915_GEM_GPU_DOMAINS)
1670
	 * commands, such as breadcrumb interrupts, are strictly ordered
1238
			cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
1671
	 * wrt the contents of the write cache being flushed to memory
-
 
1672
	 * (and thus being coherent from the CPU).
1239
				MI_FLUSH_DW_STORE_INDEX |
1673
	 */
1240
				MI_FLUSH_DW_OP_STOREDW;
1674
	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1241
	} else {
1675
 
1242
		if (invalidate_domains & I915_GEM_DOMAIN_RENDER)
1676
	if (invalidate_domains & I915_GEM_GPU_DOMAINS) {
-
 
1677
		cmd |= MI_INVALIDATE_TLB;
1243
			cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
1678
		if (ring == &dev_priv->ring[VCS])
1244
				MI_FLUSH_DW_OP_STOREDW;
1679
			cmd |= MI_INVALIDATE_BSD;
1245
	}
1680
	}
1246
 
1681
 
1247
	intel_logical_ring_emit(ringbuf, cmd);
1682
	intel_logical_ring_emit(ringbuf, cmd);
1248
	intel_logical_ring_emit(ringbuf,
1683
	intel_logical_ring_emit(ringbuf,
1249
				I915_GEM_HWS_SCRATCH_ADDR |
1684
				I915_GEM_HWS_SCRATCH_ADDR |
1250
				MI_FLUSH_DW_USE_GTT);
1685
				MI_FLUSH_DW_USE_GTT);
1251
	intel_logical_ring_emit(ringbuf, 0); /* upper addr */
1686
	intel_logical_ring_emit(ringbuf, 0); /* upper addr */
1252
	intel_logical_ring_emit(ringbuf, 0); /* value */
1687
	intel_logical_ring_emit(ringbuf, 0); /* value */
1253
	intel_logical_ring_advance(ringbuf);
1688
	intel_logical_ring_advance(ringbuf);
1254
 
1689
 
1255
	return 0;
1690
	return 0;
1256
}
1691
}
1257
 
1692
 
1258
static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
1693
static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
1259
				  u32 invalidate_domains,
1694
				  u32 invalidate_domains,
1260
				  u32 flush_domains)
1695
				  u32 flush_domains)
1261
{
1696
{
-
 
1697
	struct intel_ringbuffer *ringbuf = request->ringbuf;
1262
	struct intel_engine_cs *ring = ringbuf->ring;
1698
	struct intel_engine_cs *ring = ringbuf->ring;
1263
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
1699
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
-
 
1700
	bool vf_flush_wa;
1264
	u32 flags = 0;
1701
	u32 flags = 0;
1265
	int ret;
1702
	int ret;
1266
 
1703
 
1267
	flags |= PIPE_CONTROL_CS_STALL;
1704
	flags |= PIPE_CONTROL_CS_STALL;
1268
 
1705
 
1269
	if (flush_domains) {
1706
	if (flush_domains) {
1270
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
1707
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
1271
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
1708
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-
 
1709
		flags |= PIPE_CONTROL_FLUSH_ENABLE;
1272
	}
1710
	}
1273
 
1711
 
1274
	if (invalidate_domains) {
1712
	if (invalidate_domains) {
1275
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
1713
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
1276
		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
1714
		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
1277
		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
1715
		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
1278
		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
1716
		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
1279
		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
1717
		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
1280
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
1718
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
1281
		flags |= PIPE_CONTROL_QW_WRITE;
1719
		flags |= PIPE_CONTROL_QW_WRITE;
1282
		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
1720
		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
1283
	}
1721
	}
-
 
1722
 
-
 
1723
	/*
-
 
1724
	 * On GEN9+ Before VF_CACHE_INVALIDATE we need to emit a NULL pipe
-
 
1725
	 * control.
-
 
1726
	 */
-
 
1727
	vf_flush_wa = INTEL_INFO(ring->dev)->gen >= 9 &&
-
 
1728
		      flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
1284
 
1729
 
1285
	ret = intel_logical_ring_begin(ringbuf, 6);
1730
	ret = intel_logical_ring_begin(request, vf_flush_wa ? 12 : 6);
1286
	if (ret)
1731
	if (ret)
1287
		return ret;
1732
		return ret;
-
 
1733
 
-
 
1734
	if (vf_flush_wa) {
-
 
1735
		intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
-
 
1736
		intel_logical_ring_emit(ringbuf, 0);
-
 
1737
		intel_logical_ring_emit(ringbuf, 0);
-
 
1738
		intel_logical_ring_emit(ringbuf, 0);
-
 
1739
		intel_logical_ring_emit(ringbuf, 0);
-
 
1740
		intel_logical_ring_emit(ringbuf, 0);
-
 
1741
	}
1288
 
1742
 
1289
	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
1743
	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
1290
	intel_logical_ring_emit(ringbuf, flags);
1744
	intel_logical_ring_emit(ringbuf, flags);
1291
	intel_logical_ring_emit(ringbuf, scratch_addr);
1745
	intel_logical_ring_emit(ringbuf, scratch_addr);
1292
	intel_logical_ring_emit(ringbuf, 0);
1746
	intel_logical_ring_emit(ringbuf, 0);
1293
	intel_logical_ring_emit(ringbuf, 0);
1747
	intel_logical_ring_emit(ringbuf, 0);
1294
	intel_logical_ring_emit(ringbuf, 0);
1748
	intel_logical_ring_emit(ringbuf, 0);
1295
	intel_logical_ring_advance(ringbuf);
1749
	intel_logical_ring_advance(ringbuf);
1296
 
1750
 
1297
	return 0;
1751
	return 0;
1298
}
1752
}
1299
 
1753
 
1300
static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
1754
static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
1301
{
1755
{
1302
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1756
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1303
}
1757
}
1304
 
1758
 
1305
static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
1759
static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
1306
{
1760
{
1307
	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1761
	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1308
}
1762
}
-
 
1763
 
-
 
1764
static u32 bxt_a_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
-
 
1765
{
-
 
1766
 
-
 
1767
	/*
-
 
1768
	 * On BXT A steppings there is a HW coherency issue whereby the
-
 
1769
	 * MI_STORE_DATA_IMM storing the completed request's seqno
-
 
1770
	 * occasionally doesn't invalidate the CPU cache. Work around this by
-
 
1771
	 * clflushing the corresponding cacheline whenever the caller wants
-
 
1772
	 * the coherency to be guaranteed. Note that this cacheline is known
-
 
1773
	 * to be clean at this point, since we only write it in
-
 
1774
	 * bxt_a_set_seqno(), where we also do a clflush after the write. So
-
 
1775
	 * this clflush in practice becomes an invalidate operation.
-
 
1776
	 */
-
 
1777
 
-
 
1778
	if (!lazy_coherency)
-
 
1779
		intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
-
 
1780
 
-
 
1781
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
-
 
1782
}
-
 
1783
 
-
 
1784
static void bxt_a_set_seqno(struct intel_engine_cs *ring, u32 seqno)
-
 
1785
{
-
 
1786
	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
-
 
1787
 
-
 
1788
	/* See bxt_a_get_seqno() explaining the reason for the clflush. */
-
 
1789
	intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
-
 
1790
}
1309
 
1791
 
1310
static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
1792
static int gen8_emit_request(struct drm_i915_gem_request *request)
-
 
1793
{
1311
{
1794
	struct intel_ringbuffer *ringbuf = request->ringbuf;
1312
	struct intel_engine_cs *ring = ringbuf->ring;
1795
	struct intel_engine_cs *ring = ringbuf->ring;
1313
	u32 cmd;
1796
	u32 cmd;
1314
	int ret;
1797
	int ret;
-
 
1798
 
-
 
1799
	/*
-
 
1800
	 * Reserve space for 2 NOOPs at the end of each request to be
-
 
1801
	 * used as a workaround for not being allowed to do lite
-
 
1802
	 * restore with HEAD==TAIL (WaIdleLiteRestore).
1315
 
1803
	 */
1316
	ret = intel_logical_ring_begin(ringbuf, 6);
1804
	ret = intel_logical_ring_begin(request, 8);
1317
	if (ret)
1805
	if (ret)
1318
		return ret;
1806
		return ret;
1319
 
1807
 
1320
	cmd = MI_STORE_DWORD_IMM_GEN8;
1808
	cmd = MI_STORE_DWORD_IMM_GEN4;
1321
	cmd |= MI_GLOBAL_GTT;
1809
	cmd |= MI_GLOBAL_GTT;
1322
 
1810
 
1323
	intel_logical_ring_emit(ringbuf, cmd);
1811
	intel_logical_ring_emit(ringbuf, cmd);
1324
	intel_logical_ring_emit(ringbuf,
1812
	intel_logical_ring_emit(ringbuf,
1325
				(ring->status_page.gfx_addr +
1813
				(ring->status_page.gfx_addr +
1326
				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
1814
				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
1327
	intel_logical_ring_emit(ringbuf, 0);
1815
	intel_logical_ring_emit(ringbuf, 0);
1328
	intel_logical_ring_emit(ringbuf, ring->outstanding_lazy_seqno);
1816
	intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
1329
	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
1817
	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
1330
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1818
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1331
	intel_logical_ring_advance_and_submit(ringbuf);
1819
	intel_logical_ring_advance_and_submit(request);
-
 
1820
 
-
 
1821
	/*
-
 
1822
	 * Here we add two extra NOOPs as padding to avoid
-
 
1823
	 * lite restore of a context with HEAD==TAIL.
-
 
1824
	 */
-
 
1825
	intel_logical_ring_emit(ringbuf, MI_NOOP);
-
 
1826
	intel_logical_ring_emit(ringbuf, MI_NOOP);
-
 
1827
	intel_logical_ring_advance(ringbuf);
-
 
1828
 
-
 
1829
	return 0;
-
 
1830
}
-
 
1831
 
-
 
1832
static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
-
 
1833
{
-
 
1834
	struct render_state so;
-
 
1835
	int ret;
-
 
1836
 
-
 
1837
	ret = i915_gem_render_state_prepare(req->ring, &so);
-
 
1838
	if (ret)
-
 
1839
		return ret;
-
 
1840
 
1332
 
1841
	if (so.rodata == NULL)
-
 
1842
		return 0;
-
 
1843
 
-
 
1844
	ret = req->ring->emit_bb_start(req, so.ggtt_offset,
-
 
1845
				       I915_DISPATCH_SECURE);
-
 
1846
	if (ret)
-
 
1847
		goto out;
-
 
1848
 
-
 
1849
	ret = req->ring->emit_bb_start(req,
-
 
1850
				       (so.ggtt_offset + so.aux_batch_offset),
-
 
1851
				       I915_DISPATCH_SECURE);
-
 
1852
	if (ret)
-
 
1853
		goto out;
-
 
1854
 
-
 
1855
	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
-
 
1856
 
-
 
1857
out:
-
 
1858
	i915_gem_render_state_fini(&so);
-
 
1859
	return ret;
-
 
1860
}
-
 
1861
 
-
 
1862
static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
-
 
1863
{
-
 
1864
	int ret;
-
 
1865
 
-
 
1866
	ret = intel_logical_ring_workarounds_emit(req);
-
 
1867
	if (ret)
-
 
1868
		return ret;
-
 
1869
 
-
 
1870
	ret = intel_rcs_context_init_mocs(req);
-
 
1871
	/*
-
 
1872
	 * Failing to program the MOCS is non-fatal.The system will not
-
 
1873
	 * run at peak performance. So generate an error and carry on.
-
 
1874
	 */
-
 
1875
	if (ret)
-
 
1876
		DRM_ERROR("MOCS failed to program: expect performance issues.\n");
-
 
1877
 
1333
	return 0;
1878
	return intel_lr_context_render_state_init(req);
1334
}
1879
}
1335
 
1880
 
1336
/**
1881
/**
1337
 * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
1882
 * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
1338
 *
1883
 *
1339
 * @ring: Engine Command Streamer.
1884
 * @ring: Engine Command Streamer.
1340
 *
1885
 *
1341
 */
1886
 */
1342
void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
1887
void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
1343
{
1888
{
1344
	struct drm_i915_private *dev_priv;
1889
	struct drm_i915_private *dev_priv;
1345
 
1890
 
1346
	if (!intel_ring_initialized(ring))
1891
	if (!intel_ring_initialized(ring))
1347
		return;
1892
		return;
1348
 
1893
 
1349
	dev_priv = ring->dev->dev_private;
1894
	dev_priv = ring->dev->dev_private;
1350
 
1895
 
1351
	intel_logical_ring_stop(ring);
1896
	intel_logical_ring_stop(ring);
1352
	WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
1897
	WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
1353
	ring->preallocated_lazy_request = NULL;
-
 
1354
	ring->outstanding_lazy_seqno = 0;
-
 
1355
 
1898
 
1356
	if (ring->cleanup)
1899
	if (ring->cleanup)
1357
		ring->cleanup(ring);
1900
		ring->cleanup(ring);
1358
 
1901
 
1359
	i915_cmd_parser_fini_ring(ring);
1902
	i915_cmd_parser_fini_ring(ring);
-
 
1903
	i915_gem_batch_pool_fini(&ring->batch_pool);
1360
 
1904
 
1361
	if (ring->status_page.obj) {
1905
	if (ring->status_page.obj) {
1362
		kunmap(sg_page(ring->status_page.obj->pages->sgl));
1906
		kunmap(sg_page(ring->status_page.obj->pages->sgl));
1363
		ring->status_page.obj = NULL;
1907
		ring->status_page.obj = NULL;
1364
	}
1908
	}
-
 
1909
 
-
 
1910
	lrc_destroy_wa_ctx_obj(ring);
1365
}
1911
}
1366
 
1912
 
1367
static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
1913
static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
1368
{
1914
{
1369
	int ret;
1915
	int ret;
1370
 
1916
 
1371
	/* Intentionally left blank. */
1917
	/* Intentionally left blank. */
1372
	ring->buffer = NULL;
1918
	ring->buffer = NULL;
1373
 
1919
 
1374
	ring->dev = dev;
1920
	ring->dev = dev;
1375
	INIT_LIST_HEAD(&ring->active_list);
1921
	INIT_LIST_HEAD(&ring->active_list);
1376
	INIT_LIST_HEAD(&ring->request_list);
1922
	INIT_LIST_HEAD(&ring->request_list);
-
 
1923
	i915_gem_batch_pool_init(dev, &ring->batch_pool);
1377
	init_waitqueue_head(&ring->irq_queue);
1924
	init_waitqueue_head(&ring->irq_queue);
1378
 
1925
 
1379
	INIT_LIST_HEAD(&ring->execlist_queue);
1926
	INIT_LIST_HEAD(&ring->execlist_queue);
1380
	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
1927
	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
1381
	spin_lock_init(&ring->execlist_lock);
1928
	spin_lock_init(&ring->execlist_lock);
1382
	ring->next_context_status_buffer = 0;
-
 
1383
 
1929
 
1384
	ret = i915_cmd_parser_init_ring(ring);
1930
	ret = i915_cmd_parser_init_ring(ring);
1385
	if (ret)
1931
	if (ret)
1386
		return ret;
1932
		return ret;
1387
 
-
 
1388
	if (ring->init) {
1933
 
1389
		ret = ring->init(ring);
1934
	ret = intel_lr_context_deferred_alloc(ring->default_context, ring);
1390
		if (ret)
1935
	if (ret)
1391
			return ret;
-
 
-
 
1936
		return ret;
-
 
1937
 
-
 
1938
	/* As this is the default context, always pin it */
-
 
1939
	ret = intel_lr_context_do_pin(
1392
	}
1940
			ring,
-
 
1941
			ring->default_context->engine[ring->id].state,
-
 
1942
			ring->default_context->engine[ring->id].ringbuf);
-
 
1943
	if (ret) {
-
 
1944
		DRM_ERROR(
-
 
1945
			"Failed to pin and map ringbuffer %s: %d\n",
-
 
1946
			ring->name, ret);
1393
 
1947
		return ret;
1394
	ret = intel_lr_context_deferred_create(ring->default_context, ring);
1948
	}
1395
 
1949
 
1396
	return ret;
1950
	return ret;
1397
}
1951
}
1398
 
1952
 
1399
static int logical_render_ring_init(struct drm_device *dev)
1953
static int logical_render_ring_init(struct drm_device *dev)
1400
{
1954
{
1401
	struct drm_i915_private *dev_priv = dev->dev_private;
1955
	struct drm_i915_private *dev_priv = dev->dev_private;
1402
	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
1956
	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
-
 
1957
	int ret;
1403
 
1958
 
1404
	ring->name = "render ring";
1959
	ring->name = "render ring";
1405
	ring->id = RCS;
1960
	ring->id = RCS;
1406
	ring->mmio_base = RENDER_RING_BASE;
1961
	ring->mmio_base = RENDER_RING_BASE;
1407
	ring->irq_enable_mask =
1962
	ring->irq_enable_mask =
1408
		GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
1963
		GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
1409
	ring->irq_keep_mask =
1964
	ring->irq_keep_mask =
1410
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
1965
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
1411
	if (HAS_L3_DPF(dev))
1966
	if (HAS_L3_DPF(dev))
1412
		ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
1967
		ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
-
 
1968
 
-
 
1969
	if (INTEL_INFO(dev)->gen >= 9)
-
 
1970
		ring->init_hw = gen9_init_render_ring;
1413
 
1971
	else
1414
	ring->init = gen8_init_render_ring;
1972
		ring->init_hw = gen8_init_render_ring;
1415
	ring->init_context = intel_logical_ring_workarounds_emit;
1973
	ring->init_context = gen8_init_rcs_context;
-
 
1974
	ring->cleanup = intel_fini_pipe_control;
-
 
1975
	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
-
 
1976
		ring->get_seqno = bxt_a_get_seqno;
-
 
1977
		ring->set_seqno = bxt_a_set_seqno;
1416
	ring->cleanup = intel_fini_pipe_control;
1978
	} else {
1417
	ring->get_seqno = gen8_get_seqno;
1979
		ring->get_seqno = gen8_get_seqno;
-
 
1980
		ring->set_seqno = gen8_set_seqno;
1418
	ring->set_seqno = gen8_set_seqno;
1981
	}
1419
	ring->emit_request = gen8_emit_request;
1982
	ring->emit_request = gen8_emit_request;
1420
	ring->emit_flush = gen8_emit_flush_render;
1983
	ring->emit_flush = gen8_emit_flush_render;
1421
	ring->irq_get = gen8_logical_ring_get_irq;
1984
	ring->irq_get = gen8_logical_ring_get_irq;
1422
	ring->irq_put = gen8_logical_ring_put_irq;
1985
	ring->irq_put = gen8_logical_ring_put_irq;
1423
	ring->emit_bb_start = gen8_emit_bb_start;
1986
	ring->emit_bb_start = gen8_emit_bb_start;
-
 
1987
 
-
 
1988
	ring->dev = dev;
-
 
1989
 
-
 
1990
	ret = intel_init_pipe_control(ring);
-
 
1991
	if (ret)
-
 
1992
		return ret;
-
 
1993
 
-
 
1994
	ret = intel_init_workaround_bb(ring);
-
 
1995
	if (ret) {
-
 
1996
		/*
-
 
1997
		 * We continue even if we fail to initialize WA batch
-
 
1998
		 * because we only expect rare glitches but nothing
-
 
1999
		 * critical to prevent us from using GPU
-
 
2000
		 */
-
 
2001
		DRM_ERROR("WA batch buffer initialization failed: %d\n",
-
 
2002
			  ret);
-
 
2003
	}
1424
 
2004
 
-
 
2005
	ret = logical_ring_init(dev, ring);
-
 
2006
	if (ret) {
-
 
2007
		lrc_destroy_wa_ctx_obj(ring);
-
 
2008
	}
-
 
2009
 
1425
	return logical_ring_init(dev, ring);
2010
	return ret;
1426
}
2011
}
1427
 
2012
 
1428
static int logical_bsd_ring_init(struct drm_device *dev)
2013
static int logical_bsd_ring_init(struct drm_device *dev)
1429
{
2014
{
1430
	struct drm_i915_private *dev_priv = dev->dev_private;
2015
	struct drm_i915_private *dev_priv = dev->dev_private;
1431
	struct intel_engine_cs *ring = &dev_priv->ring[VCS];
2016
	struct intel_engine_cs *ring = &dev_priv->ring[VCS];
1432
 
2017
 
1433
	ring->name = "bsd ring";
2018
	ring->name = "bsd ring";
1434
	ring->id = VCS;
2019
	ring->id = VCS;
1435
	ring->mmio_base = GEN6_BSD_RING_BASE;
2020
	ring->mmio_base = GEN6_BSD_RING_BASE;
1436
	ring->irq_enable_mask =
2021
	ring->irq_enable_mask =
1437
		GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2022
		GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
1438
	ring->irq_keep_mask =
2023
	ring->irq_keep_mask =
1439
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2024
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
1440
 
2025
 
-
 
2026
	ring->init_hw = gen8_init_common_ring;
-
 
2027
	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
-
 
2028
		ring->get_seqno = bxt_a_get_seqno;
-
 
2029
		ring->set_seqno = bxt_a_set_seqno;
1441
	ring->init = gen8_init_common_ring;
2030
	} else {
1442
	ring->get_seqno = gen8_get_seqno;
2031
		ring->get_seqno = gen8_get_seqno;
-
 
2032
		ring->set_seqno = gen8_set_seqno;
1443
	ring->set_seqno = gen8_set_seqno;
2033
	}
1444
	ring->emit_request = gen8_emit_request;
2034
	ring->emit_request = gen8_emit_request;
1445
	ring->emit_flush = gen8_emit_flush;
2035
	ring->emit_flush = gen8_emit_flush;
1446
	ring->irq_get = gen8_logical_ring_get_irq;
2036
	ring->irq_get = gen8_logical_ring_get_irq;
1447
	ring->irq_put = gen8_logical_ring_put_irq;
2037
	ring->irq_put = gen8_logical_ring_put_irq;
1448
	ring->emit_bb_start = gen8_emit_bb_start;
2038
	ring->emit_bb_start = gen8_emit_bb_start;
1449
 
2039
 
1450
	return logical_ring_init(dev, ring);
2040
	return logical_ring_init(dev, ring);
1451
}
2041
}
1452
 
2042
 
1453
static int logical_bsd2_ring_init(struct drm_device *dev)
2043
static int logical_bsd2_ring_init(struct drm_device *dev)
1454
{
2044
{
1455
	struct drm_i915_private *dev_priv = dev->dev_private;
2045
	struct drm_i915_private *dev_priv = dev->dev_private;
1456
	struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
2046
	struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
1457
 
2047
 
1458
	ring->name = "bds2 ring";
2048
	ring->name = "bds2 ring";
1459
	ring->id = VCS2;
2049
	ring->id = VCS2;
1460
	ring->mmio_base = GEN8_BSD2_RING_BASE;
2050
	ring->mmio_base = GEN8_BSD2_RING_BASE;
1461
	ring->irq_enable_mask =
2051
	ring->irq_enable_mask =
1462
		GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2052
		GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
1463
	ring->irq_keep_mask =
2053
	ring->irq_keep_mask =
1464
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2054
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
1465
 
2055
 
1466
	ring->init = gen8_init_common_ring;
2056
	ring->init_hw = gen8_init_common_ring;
1467
	ring->get_seqno = gen8_get_seqno;
2057
	ring->get_seqno = gen8_get_seqno;
1468
	ring->set_seqno = gen8_set_seqno;
2058
	ring->set_seqno = gen8_set_seqno;
1469
	ring->emit_request = gen8_emit_request;
2059
	ring->emit_request = gen8_emit_request;
1470
	ring->emit_flush = gen8_emit_flush;
2060
	ring->emit_flush = gen8_emit_flush;
1471
	ring->irq_get = gen8_logical_ring_get_irq;
2061
	ring->irq_get = gen8_logical_ring_get_irq;
1472
	ring->irq_put = gen8_logical_ring_put_irq;
2062
	ring->irq_put = gen8_logical_ring_put_irq;
1473
	ring->emit_bb_start = gen8_emit_bb_start;
2063
	ring->emit_bb_start = gen8_emit_bb_start;
1474
 
2064
 
1475
	return logical_ring_init(dev, ring);
2065
	return logical_ring_init(dev, ring);
1476
}
2066
}
1477
 
2067
 
1478
static int logical_blt_ring_init(struct drm_device *dev)
2068
static int logical_blt_ring_init(struct drm_device *dev)
1479
{
2069
{
1480
	struct drm_i915_private *dev_priv = dev->dev_private;
2070
	struct drm_i915_private *dev_priv = dev->dev_private;
1481
	struct intel_engine_cs *ring = &dev_priv->ring[BCS];
2071
	struct intel_engine_cs *ring = &dev_priv->ring[BCS];
1482
 
2072
 
1483
	ring->name = "blitter ring";
2073
	ring->name = "blitter ring";
1484
	ring->id = BCS;
2074
	ring->id = BCS;
1485
	ring->mmio_base = BLT_RING_BASE;
2075
	ring->mmio_base = BLT_RING_BASE;
1486
	ring->irq_enable_mask =
2076
	ring->irq_enable_mask =
1487
		GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
2077
		GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
1488
	ring->irq_keep_mask =
2078
	ring->irq_keep_mask =
1489
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
2079
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
1490
 
2080
 
-
 
2081
	ring->init_hw = gen8_init_common_ring;
-
 
2082
	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
-
 
2083
		ring->get_seqno = bxt_a_get_seqno;
-
 
2084
		ring->set_seqno = bxt_a_set_seqno;
1491
	ring->init = gen8_init_common_ring;
2085
	} else {
1492
	ring->get_seqno = gen8_get_seqno;
2086
		ring->get_seqno = gen8_get_seqno;
-
 
2087
		ring->set_seqno = gen8_set_seqno;
1493
	ring->set_seqno = gen8_set_seqno;
2088
	}
1494
	ring->emit_request = gen8_emit_request;
2089
	ring->emit_request = gen8_emit_request;
1495
	ring->emit_flush = gen8_emit_flush;
2090
	ring->emit_flush = gen8_emit_flush;
1496
	ring->irq_get = gen8_logical_ring_get_irq;
2091
	ring->irq_get = gen8_logical_ring_get_irq;
1497
	ring->irq_put = gen8_logical_ring_put_irq;
2092
	ring->irq_put = gen8_logical_ring_put_irq;
1498
	ring->emit_bb_start = gen8_emit_bb_start;
2093
	ring->emit_bb_start = gen8_emit_bb_start;
1499
 
2094
 
1500
	return logical_ring_init(dev, ring);
2095
	return logical_ring_init(dev, ring);
1501
}
2096
}
1502
 
2097
 
1503
static int logical_vebox_ring_init(struct drm_device *dev)
2098
static int logical_vebox_ring_init(struct drm_device *dev)
1504
{
2099
{
1505
	struct drm_i915_private *dev_priv = dev->dev_private;
2100
	struct drm_i915_private *dev_priv = dev->dev_private;
1506
	struct intel_engine_cs *ring = &dev_priv->ring[VECS];
2101
	struct intel_engine_cs *ring = &dev_priv->ring[VECS];
1507
 
2102
 
1508
	ring->name = "video enhancement ring";
2103
	ring->name = "video enhancement ring";
1509
	ring->id = VECS;
2104
	ring->id = VECS;
1510
	ring->mmio_base = VEBOX_RING_BASE;
2105
	ring->mmio_base = VEBOX_RING_BASE;
1511
	ring->irq_enable_mask =
2106
	ring->irq_enable_mask =
1512
		GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
2107
		GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
1513
	ring->irq_keep_mask =
2108
	ring->irq_keep_mask =
1514
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
2109
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
1515
 
2110
 
-
 
2111
	ring->init_hw = gen8_init_common_ring;
-
 
2112
	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
-
 
2113
		ring->get_seqno = bxt_a_get_seqno;
-
 
2114
		ring->set_seqno = bxt_a_set_seqno;
1516
	ring->init = gen8_init_common_ring;
2115
	} else {
1517
	ring->get_seqno = gen8_get_seqno;
2116
		ring->get_seqno = gen8_get_seqno;
-
 
2117
		ring->set_seqno = gen8_set_seqno;
1518
	ring->set_seqno = gen8_set_seqno;
2118
	}
1519
	ring->emit_request = gen8_emit_request;
2119
	ring->emit_request = gen8_emit_request;
1520
	ring->emit_flush = gen8_emit_flush;
2120
	ring->emit_flush = gen8_emit_flush;
1521
	ring->irq_get = gen8_logical_ring_get_irq;
2121
	ring->irq_get = gen8_logical_ring_get_irq;
1522
	ring->irq_put = gen8_logical_ring_put_irq;
2122
	ring->irq_put = gen8_logical_ring_put_irq;
1523
	ring->emit_bb_start = gen8_emit_bb_start;
2123
	ring->emit_bb_start = gen8_emit_bb_start;
1524
 
2124
 
1525
	return logical_ring_init(dev, ring);
2125
	return logical_ring_init(dev, ring);
1526
}
2126
}
1527
 
2127
 
1528
/**
2128
/**
1529
 * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers
2129
 * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers
1530
 * @dev: DRM device.
2130
 * @dev: DRM device.
1531
 *
2131
 *
1532
 * This function inits the engines for an Execlists submission style (the equivalent in the
2132
 * This function inits the engines for an Execlists submission style (the equivalent in the
1533
 * legacy ringbuffer submission world would be i915_gem_init_rings). It does it only for
2133
 * legacy ringbuffer submission world would be i915_gem_init_rings). It does it only for
1534
 * those engines that are present in the hardware.
2134
 * those engines that are present in the hardware.
1535
 *
2135
 *
1536
 * Return: non-zero if the initialization failed.
2136
 * Return: non-zero if the initialization failed.
1537
 */
2137
 */
1538
int intel_logical_rings_init(struct drm_device *dev)
2138
int intel_logical_rings_init(struct drm_device *dev)
1539
{
2139
{
1540
	struct drm_i915_private *dev_priv = dev->dev_private;
2140
	struct drm_i915_private *dev_priv = dev->dev_private;
1541
	int ret;
2141
	int ret;
1542
 
2142
 
1543
	ret = logical_render_ring_init(dev);
2143
	ret = logical_render_ring_init(dev);
1544
	if (ret)
2144
	if (ret)
1545
		return ret;
2145
		return ret;
1546
 
2146
 
1547
	if (HAS_BSD(dev)) {
2147
	if (HAS_BSD(dev)) {
1548
		ret = logical_bsd_ring_init(dev);
2148
		ret = logical_bsd_ring_init(dev);
1549
		if (ret)
2149
		if (ret)
1550
			goto cleanup_render_ring;
2150
			goto cleanup_render_ring;
1551
	}
2151
	}
1552
 
2152
 
1553
	if (HAS_BLT(dev)) {
2153
	if (HAS_BLT(dev)) {
1554
		ret = logical_blt_ring_init(dev);
2154
		ret = logical_blt_ring_init(dev);
1555
		if (ret)
2155
		if (ret)
1556
			goto cleanup_bsd_ring;
2156
			goto cleanup_bsd_ring;
1557
	}
2157
	}
1558
 
2158
 
1559
	if (HAS_VEBOX(dev)) {
2159
	if (HAS_VEBOX(dev)) {
1560
		ret = logical_vebox_ring_init(dev);
2160
		ret = logical_vebox_ring_init(dev);
1561
		if (ret)
2161
		if (ret)
1562
			goto cleanup_blt_ring;
2162
			goto cleanup_blt_ring;
1563
	}
2163
	}
1564
 
2164
 
1565
	if (HAS_BSD2(dev)) {
2165
	if (HAS_BSD2(dev)) {
1566
		ret = logical_bsd2_ring_init(dev);
2166
		ret = logical_bsd2_ring_init(dev);
1567
		if (ret)
2167
		if (ret)
1568
			goto cleanup_vebox_ring;
2168
			goto cleanup_vebox_ring;
1569
	}
2169
	}
1570
 
-
 
1571
	ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
-
 
1572
	if (ret)
-
 
1573
		goto cleanup_bsd2_ring;
-
 
1574
 
2170
 
1575
	return 0;
-
 
1576
 
-
 
1577
cleanup_bsd2_ring:
2171
	return 0;
1578
	intel_logical_ring_cleanup(&dev_priv->ring[VCS2]);
2172
 
1579
cleanup_vebox_ring:
2173
cleanup_vebox_ring:
1580
	intel_logical_ring_cleanup(&dev_priv->ring[VECS]);
2174
	intel_logical_ring_cleanup(&dev_priv->ring[VECS]);
1581
cleanup_blt_ring:
2175
cleanup_blt_ring:
1582
	intel_logical_ring_cleanup(&dev_priv->ring[BCS]);
2176
	intel_logical_ring_cleanup(&dev_priv->ring[BCS]);
1583
cleanup_bsd_ring:
2177
cleanup_bsd_ring:
1584
	intel_logical_ring_cleanup(&dev_priv->ring[VCS]);
2178
	intel_logical_ring_cleanup(&dev_priv->ring[VCS]);
1585
cleanup_render_ring:
2179
cleanup_render_ring:
1586
	intel_logical_ring_cleanup(&dev_priv->ring[RCS]);
2180
	intel_logical_ring_cleanup(&dev_priv->ring[RCS]);
1587
 
2181
 
1588
	return ret;
2182
	return ret;
1589
}
2183
}
1590
 
2184
 
1591
int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
2185
static u32
1592
				       struct intel_context *ctx)
2186
make_rpcs(struct drm_device *dev)
1593
{
-
 
1594
	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
-
 
1595
	struct render_state so;
-
 
1596
	struct drm_i915_file_private *file_priv = ctx->file_priv;
-
 
1597
	struct drm_file *file = file_priv ? file_priv->file : NULL;
2187
{
-
 
2188
	u32 rpcs = 0;
1598
	int ret;
2189
 
1599
 
-
 
1600
	ret = i915_gem_render_state_prepare(ring, &so);
2190
	/*
1601
	if (ret)
2191
	 * No explicit RPCS request is needed to ensure full
1602
		return ret;
2192
	 * slice/subslice/EU enablement prior to Gen9.
1603
 
2193
	*/
-
 
2194
	if (INTEL_INFO(dev)->gen < 9)
1604
	if (so.rodata == NULL)
2195
		return 0;
-
 
2196
 
-
 
2197
	/*
1605
		return 0;
2198
	 * Starting in Gen9, render power gating can leave
-
 
2199
	 * slice/subslice/EU in a partially enabled state. We
-
 
2200
	 * must make an explicit request through RPCS for full
-
 
2201
	 * enablement.
-
 
2202
	*/
1606
 
2203
	if (INTEL_INFO(dev)->has_slice_pg) {
1607
	ret = ring->emit_bb_start(ringbuf,
2204
		rpcs |= GEN8_RPCS_S_CNT_ENABLE;
1608
			so.ggtt_offset,
2205
		rpcs |= INTEL_INFO(dev)->slice_total <<
-
 
2206
			GEN8_RPCS_S_CNT_SHIFT;
-
 
2207
		rpcs |= GEN8_RPCS_ENABLE;
1609
			I915_DISPATCH_SECURE);
2208
	}
-
 
2209
 
-
 
2210
	if (INTEL_INFO(dev)->has_subslice_pg) {
-
 
2211
		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
-
 
2212
		rpcs |= INTEL_INFO(dev)->subslice_per_slice <<
1610
	if (ret)
2213
			GEN8_RPCS_SS_CNT_SHIFT;
-
 
2214
		rpcs |= GEN8_RPCS_ENABLE;
1611
		goto out;
2215
	}
-
 
2216
 
1612
 
2217
	if (INTEL_INFO(dev)->has_eu_pg) {
1613
	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
2218
		rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
1614
 
-
 
-
 
2219
			GEN8_RPCS_EU_MIN_SHIFT;
1615
	ret = __i915_add_request(ring, file, so.obj, NULL);
2220
		rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
1616
	/* intel_logical_ring_add_request moves object to inactive if it
2221
			GEN8_RPCS_EU_MAX_SHIFT;
1617
	 * fails */
2222
		rpcs |= GEN8_RPCS_ENABLE;
1618
out:
2223
	}
1619
	i915_gem_render_state_fini(&so);
2224
 
1620
	return ret;
2225
	return rpcs;
1621
}
2226
}
1622
 
2227
 
1623
static int
2228
static int
1624
populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj,
2229
populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj,
1625
		    struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf)
2230
		    struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf)
1626
{
2231
{
1627
	struct drm_device *dev = ring->dev;
2232
	struct drm_device *dev = ring->dev;
1628
	struct drm_i915_private *dev_priv = dev->dev_private;
2233
	struct drm_i915_private *dev_priv = dev->dev_private;
1629
	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
2234
	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
1630
	struct page *page;
2235
	struct page *page;
1631
	uint32_t *reg_state;
2236
	uint32_t *reg_state;
1632
	int ret;
2237
	int ret;
1633
 
2238
 
1634
	if (!ppgtt)
2239
	if (!ppgtt)
1635
		ppgtt = dev_priv->mm.aliasing_ppgtt;
2240
		ppgtt = dev_priv->mm.aliasing_ppgtt;
1636
 
2241
 
1637
	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
2242
	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
1638
	if (ret) {
2243
	if (ret) {
1639
		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
2244
		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
1640
		return ret;
2245
		return ret;
1641
	}
2246
	}
1642
 
2247
 
1643
	ret = i915_gem_object_get_pages(ctx_obj);
2248
	ret = i915_gem_object_get_pages(ctx_obj);
1644
	if (ret) {
2249
	if (ret) {
1645
		DRM_DEBUG_DRIVER("Could not get object pages\n");
2250
		DRM_DEBUG_DRIVER("Could not get object pages\n");
1646
		return ret;
2251
		return ret;
1647
	}
2252
	}
1648
 
2253
 
1649
	i915_gem_object_pin_pages(ctx_obj);
2254
	i915_gem_object_pin_pages(ctx_obj);
1650
 
2255
 
1651
	/* The second page of the context object contains some fields which must
2256
	/* The second page of the context object contains some fields which must
1652
	 * be set up prior to the first execution. */
2257
	 * be set up prior to the first execution. */
1653
	page = i915_gem_object_get_page(ctx_obj, 1);
2258
	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
1654
	reg_state = kmap_atomic(page);
2259
	reg_state = kmap_atomic(page);
1655
 
2260
 
1656
	/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
2261
	/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
1657
	 * commands followed by (reg, value) pairs. The values we are setting here are
2262
	 * commands followed by (reg, value) pairs. The values we are setting here are
1658
	 * only for the first context restore: on a subsequent save, the GPU will
2263
	 * only for the first context restore: on a subsequent save, the GPU will
1659
	 * recreate this batchbuffer with new values (including all the missing
2264
	 * recreate this batchbuffer with new values (including all the missing
1660
	 * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */
2265
	 * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */
1661
	if (ring->id == RCS)
2266
	if (ring->id == RCS)
1662
		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14);
2267
		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14);
1663
	else
2268
	else
1664
		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11);
2269
		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11);
1665
	reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED;
2270
	reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED;
1666
	reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
2271
	reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
1667
	reg_state[CTX_CONTEXT_CONTROL+1] =
2272
	reg_state[CTX_CONTEXT_CONTROL+1] =
1668
			_MASKED_BIT_ENABLE((1<<3) | MI_RESTORE_INHIBIT);
2273
		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
-
 
2274
				   CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-
 
2275
				   CTX_CTRL_RS_CTX_ENABLE);
1669
	reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
2276
	reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
1670
	reg_state[CTX_RING_HEAD+1] = 0;
2277
	reg_state[CTX_RING_HEAD+1] = 0;
1671
	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
2278
	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
1672
	reg_state[CTX_RING_TAIL+1] = 0;
2279
	reg_state[CTX_RING_TAIL+1] = 0;
1673
	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
2280
	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
1674
	/* Ring buffer start address is not known until the buffer is pinned.
2281
	/* Ring buffer start address is not known until the buffer is pinned.
1675
	 * It is written to the context image in execlists_update_context()
2282
	 * It is written to the context image in execlists_update_context()
1676
	 */
2283
	 */
1677
	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
2284
	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
1678
	reg_state[CTX_RING_BUFFER_CONTROL+1] =
2285
	reg_state[CTX_RING_BUFFER_CONTROL+1] =
1679
			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
2286
			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
1680
	reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168;
2287
	reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168;
1681
	reg_state[CTX_BB_HEAD_U+1] = 0;
2288
	reg_state[CTX_BB_HEAD_U+1] = 0;
1682
	reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140;
2289
	reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140;
1683
	reg_state[CTX_BB_HEAD_L+1] = 0;
2290
	reg_state[CTX_BB_HEAD_L+1] = 0;
1684
	reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110;
2291
	reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110;
1685
	reg_state[CTX_BB_STATE+1] = (1<<5);
2292
	reg_state[CTX_BB_STATE+1] = (1<<5);
1686
	reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c;
2293
	reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c;
1687
	reg_state[CTX_SECOND_BB_HEAD_U+1] = 0;
2294
	reg_state[CTX_SECOND_BB_HEAD_U+1] = 0;
1688
	reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
2295
	reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
1689
	reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
2296
	reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
1690
	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
2297
	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
1691
	reg_state[CTX_SECOND_BB_STATE+1] = 0;
2298
	reg_state[CTX_SECOND_BB_STATE+1] = 0;
1692
	if (ring->id == RCS) {
2299
	if (ring->id == RCS) {
1693
		/* TODO: according to BSpec, the register state context
-
 
1694
		 * for CHV does not have these. OTOH, these registers do
-
 
1695
		 * exist in CHV. I'm waiting for a clarification */
-
 
1696
		reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
2300
		reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
1697
		reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
2301
		reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
1698
		reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
2302
		reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
1699
		reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
2303
		reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
1700
		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
2304
		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
1701
		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
2305
		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
-
 
2306
		if (ring->wa_ctx.obj) {
-
 
2307
			struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
-
 
2308
			uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj);
-
 
2309
 
-
 
2310
			reg_state[CTX_RCS_INDIRECT_CTX+1] =
-
 
2311
				(ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) |
-
 
2312
				(wa_ctx->indirect_ctx.size / CACHELINE_DWORDS);
-
 
2313
 
-
 
2314
			reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] =
-
 
2315
				CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT << 6;
-
 
2316
 
-
 
2317
			reg_state[CTX_BB_PER_CTX_PTR+1] =
-
 
2318
				(ggtt_offset + wa_ctx->per_ctx.offset * sizeof(uint32_t)) |
-
 
2319
				0x01;
-
 
2320
		}
1702
	}
2321
	}
1703
	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
2322
	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
1704
	reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
2323
	reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
1705
	reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
2324
	reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
1706
	reg_state[CTX_CTX_TIMESTAMP+1] = 0;
2325
	reg_state[CTX_CTX_TIMESTAMP+1] = 0;
1707
	reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
2326
	reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
1708
	reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
2327
	reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
1709
	reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
2328
	reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
1710
	reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
2329
	reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
1711
	reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
2330
	reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
1712
	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
2331
	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
1713
	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
2332
	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
1714
	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
2333
	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
-
 
2334
 
1715
	reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[3]);
2335
	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
-
 
2336
		/* 64b PPGTT (48bit canonical)
-
 
2337
		 * PDP0_DESCRIPTOR contains the base address to PML4 and
-
 
2338
		 * other PDP Descriptors are ignored.
-
 
2339
		 */
-
 
2340
		ASSIGN_CTX_PML4(ppgtt, reg_state);
-
 
2341
	} else {
-
 
2342
		/* 32b PPGTT
1716
	reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[3]);
2343
		 * PDP*_DESCRIPTOR contains the base address of space supported.
1717
	reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[2]);
2344
		 * With dynamic page allocation, PDPs may not be allocated at
1718
	reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[2]);
2345
		 * this point. Point the unallocated PDPs to the scratch page
-
 
2346
		 */
1719
	reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[1]);
2347
		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
1720
	reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[1]);
2348
		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
1721
	reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[0]);
2349
		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
1722
	reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[0]);
2350
		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
-
 
2351
	}
-
 
2352
 
1723
	if (ring->id == RCS) {
2353
	if (ring->id == RCS) {
1724
		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
2354
		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
1725
		reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8;
2355
		reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;
1726
		reg_state[CTX_R_PWR_CLK_STATE+1] = 0;
2356
		reg_state[CTX_R_PWR_CLK_STATE+1] = make_rpcs(dev);
1727
	}
2357
	}
1728
 
2358
 
1729
	kunmap_atomic(reg_state);
2359
	kunmap_atomic(reg_state);
1730
 
2360
 
1731
	ctx_obj->dirty = 1;
2361
	ctx_obj->dirty = 1;
1732
//   set_page_dirty(page);
-
 
1733
	i915_gem_object_unpin_pages(ctx_obj);
2362
    i915_gem_object_unpin_pages(ctx_obj);
1734
 
2363
 
1735
	return 0;
2364
	return 0;
1736
}
2365
}
1737
 
2366
 
1738
/**
2367
/**
1739
 * intel_lr_context_free() - free the LRC specific bits of a context
2368
 * intel_lr_context_free() - free the LRC specific bits of a context
1740
 * @ctx: the LR context to free.
2369
 * @ctx: the LR context to free.
1741
 *
2370
 *
1742
 * The real context freeing is done in i915_gem_context_free: this only
2371
 * The real context freeing is done in i915_gem_context_free: this only
1743
 * takes care of the bits that are LRC related: the per-engine backing
2372
 * takes care of the bits that are LRC related: the per-engine backing
1744
 * objects and the logical ringbuffer.
2373
 * objects and the logical ringbuffer.
1745
 */
2374
 */
1746
void intel_lr_context_free(struct intel_context *ctx)
2375
void intel_lr_context_free(struct intel_context *ctx)
1747
{
2376
{
1748
	int i;
2377
	int i;
1749
 
2378
 
1750
	for (i = 0; i < I915_NUM_RINGS; i++) {
2379
	for (i = 0; i < I915_NUM_RINGS; i++) {
1751
		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
2380
		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
1752
 
2381
 
1753
		if (ctx_obj) {
2382
		if (ctx_obj) {
1754
			struct intel_ringbuffer *ringbuf =
2383
			struct intel_ringbuffer *ringbuf =
1755
					ctx->engine[i].ringbuf;
2384
					ctx->engine[i].ringbuf;
1756
			struct intel_engine_cs *ring = ringbuf->ring;
2385
			struct intel_engine_cs *ring = ringbuf->ring;
1757
 
2386
 
1758
			if (ctx == ring->default_context) {
2387
			if (ctx == ring->default_context) {
1759
				intel_unpin_ringbuffer_obj(ringbuf);
2388
				intel_unpin_ringbuffer_obj(ringbuf);
1760
				i915_gem_object_ggtt_unpin(ctx_obj);
2389
				i915_gem_object_ggtt_unpin(ctx_obj);
1761
			}
2390
			}
1762
			intel_destroy_ringbuffer_obj(ringbuf);
2391
			WARN_ON(ctx->engine[ring->id].pin_count);
1763
			kfree(ringbuf);
2392
			intel_ringbuffer_free(ringbuf);
1764
			drm_gem_object_unreference(&ctx_obj->base);
2393
			drm_gem_object_unreference(&ctx_obj->base);
1765
		}
2394
		}
1766
	}
2395
	}
1767
}
2396
}
1768
 
2397
 
1769
static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
2398
static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
1770
{
2399
{
1771
	int ret = 0;
2400
	int ret = 0;
1772
 
2401
 
1773
	WARN_ON(INTEL_INFO(ring->dev)->gen < 8);
2402
	WARN_ON(INTEL_INFO(ring->dev)->gen < 8);
1774
 
2403
 
1775
	switch (ring->id) {
2404
	switch (ring->id) {
1776
	case RCS:
2405
	case RCS:
1777
		if (INTEL_INFO(ring->dev)->gen >= 9)
2406
		if (INTEL_INFO(ring->dev)->gen >= 9)
1778
			ret = GEN9_LR_CONTEXT_RENDER_SIZE;
2407
			ret = GEN9_LR_CONTEXT_RENDER_SIZE;
1779
		else
2408
		else
1780
			ret = GEN8_LR_CONTEXT_RENDER_SIZE;
2409
			ret = GEN8_LR_CONTEXT_RENDER_SIZE;
1781
		break;
2410
		break;
1782
	case VCS:
2411
	case VCS:
1783
	case BCS:
2412
	case BCS:
1784
	case VECS:
2413
	case VECS:
1785
	case VCS2:
2414
	case VCS2:
1786
		ret = GEN8_LR_CONTEXT_OTHER_SIZE;
2415
		ret = GEN8_LR_CONTEXT_OTHER_SIZE;
1787
		break;
2416
		break;
1788
	}
2417
	}
1789
 
2418
 
1790
	return ret;
2419
	return ret;
1791
}
2420
}
1792
 
2421
 
1793
static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
2422
static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
1794
		struct drm_i915_gem_object *default_ctx_obj)
2423
		struct drm_i915_gem_object *default_ctx_obj)
1795
{
2424
{
1796
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
2425
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-
 
2426
	struct page *page;
1797
 
2427
 
1798
	/* The status page is offset 0 from the default context object
-
 
1799
	 * in LRC mode. */
2428
	/* The HWSP is part of the default context object in LRC mode. */
1800
	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj);
2429
	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj)
1801
	ring->status_page.page_addr =
2430
			+ LRC_PPHWSP_PN * PAGE_SIZE;
-
 
2431
	page = i915_gem_object_get_page(default_ctx_obj, LRC_PPHWSP_PN);
1802
			kmap(sg_page(default_ctx_obj->pages->sgl));
2432
	ring->status_page.page_addr = kmap(page);
1803
	ring->status_page.obj = default_ctx_obj;
2433
	ring->status_page.obj = default_ctx_obj;
1804
 
2434
 
1805
	I915_WRITE(RING_HWS_PGA(ring->mmio_base),
2435
	I915_WRITE(RING_HWS_PGA(ring->mmio_base),
1806
			(u32)ring->status_page.gfx_addr);
2436
			(u32)ring->status_page.gfx_addr);
1807
	POSTING_READ(RING_HWS_PGA(ring->mmio_base));
2437
	POSTING_READ(RING_HWS_PGA(ring->mmio_base));
1808
}
2438
}
1809
 
2439
 
1810
/**
2440
/**
1811
 * intel_lr_context_deferred_create() - create the LRC specific bits of a context
2441
 * intel_lr_context_deferred_alloc() - create the LRC specific bits of a context
1812
 * @ctx: LR context to create.
2442
 * @ctx: LR context to create.
1813
 * @ring: engine to be used with the context.
2443
 * @ring: engine to be used with the context.
1814
 *
2444
 *
1815
 * This function can be called more than once, with different engines, if we plan
2445
 * This function can be called more than once, with different engines, if we plan
1816
 * to use the context with them. The context backing objects and the ringbuffers
2446
 * to use the context with them. The context backing objects and the ringbuffers
1817
 * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why
2447
 * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why
1818
 * the creation is a deferred call: it's better to make sure first that we need to use
2448
 * the creation is a deferred call: it's better to make sure first that we need to use
1819
 * a given ring with the context.
2449
 * a given ring with the context.
1820
 *
2450
 *
1821
 * Return: non-zero on error.
2451
 * Return: non-zero on error.
1822
 */
2452
 */
-
 
2453
 
1823
int intel_lr_context_deferred_create(struct intel_context *ctx,
2454
int intel_lr_context_deferred_alloc(struct intel_context *ctx,
1824
				     struct intel_engine_cs *ring)
2455
				     struct intel_engine_cs *ring)
1825
{
2456
{
1826
	const bool is_global_default_ctx = (ctx == ring->default_context);
-
 
1827
	struct drm_device *dev = ring->dev;
2457
	struct drm_device *dev = ring->dev;
1828
	struct drm_i915_gem_object *ctx_obj;
2458
	struct drm_i915_gem_object *ctx_obj;
1829
	uint32_t context_size;
2459
	uint32_t context_size;
1830
	struct intel_ringbuffer *ringbuf;
2460
	struct intel_ringbuffer *ringbuf;
1831
	int ret;
2461
	int ret;
1832
 
2462
 
1833
	WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL);
2463
	WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL);
1834
	if (ctx->engine[ring->id].state)
2464
	WARN_ON(ctx->engine[ring->id].state);
1835
		return 0;
-
 
1836
 
2465
 
1837
	context_size = round_up(get_lr_context_size(ring), 4096);
2466
	context_size = round_up(get_lr_context_size(ring), 4096);
1838
 
2467
 
1839
	ctx_obj = i915_gem_alloc_context_obj(dev, context_size);
-
 
1840
	if (IS_ERR(ctx_obj)) {
2468
	/* One extra page as the sharing data between driver and GuC */
1841
		ret = PTR_ERR(ctx_obj);
-
 
1842
		DRM_DEBUG_DRIVER("Alloc LRC backing obj failed: %d\n", ret);
-
 
1843
		return ret;
-
 
1844
	}
-
 
1845
 
2469
	context_size += PAGE_SIZE * LRC_PPHWSP_PN;
1846
	if (is_global_default_ctx) {
2470
 
1847
		ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
2471
	ctx_obj = i915_gem_alloc_object(dev, context_size);
1848
		if (ret) {
-
 
1849
			DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n",
-
 
1850
					ret);
2472
	if (!ctx_obj) {
1851
			drm_gem_object_unreference(&ctx_obj->base);
2473
		DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n");
-
 
2474
		return -ENOMEM;
-
 
2475
	}
-
 
2476
 
-
 
2477
	ringbuf = intel_engine_create_ringbuffer(ring, 4 * PAGE_SIZE);
-
 
2478
	if (IS_ERR(ringbuf)) {
1852
			return ret;
2479
		ret = PTR_ERR(ringbuf);
1853
		}
2480
		goto error_deref_obj;
1854
	}
2481
	}
1855
 
2482
 
1856
	ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
-
 
1857
	if (!ringbuf) {
-
 
1858
		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
2483
	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
1859
				ring->name);
2484
	if (ret) {
1860
		ret = -ENOMEM;
2485
		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
1861
		goto error_unpin_ctx;
2486
		goto error_ringbuf;
1862
	}
-
 
1863
 
2487
	}
1864
	ringbuf->ring = ring;
-
 
1865
	ringbuf->FIXME_lrc_ctx = ctx;
-
 
1866
 
-
 
1867
	ringbuf->size = 32 * PAGE_SIZE;
2488
 
1868
	ringbuf->effective_size = ringbuf->size;
2489
	ctx->engine[ring->id].ringbuf = ringbuf;
1869
	ringbuf->head = 0;
2490
	ctx->engine[ring->id].state = ctx_obj;
1870
	ringbuf->tail = 0;
2491
 
1871
	ringbuf->space = ringbuf->size;
-
 
1872
	ringbuf->last_retired_head = -1;
2492
	if (ctx != ring->default_context && ring->init_context) {
1873
 
2493
		struct drm_i915_gem_request *req;
1874
	if (ringbuf->obj == NULL) {
2494
 
1875
		ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
2495
		ret = i915_gem_request_alloc(ring,
1876
		if (ret) {
2496
			ctx, &req);
1877
			DRM_DEBUG_DRIVER(
-
 
1878
				"Failed to allocate ringbuffer obj %s: %d\n",
2497
		if (ret) {
1879
				ring->name, ret);
-
 
1880
			goto error_free_rbuf;
2498
			DRM_ERROR("ring create req: %d\n",
1881
		}
2499
				ret);
-
 
2500
			goto error_ringbuf;
1882
 
2501
		}
1883
		if (is_global_default_ctx) {
2502
 
-
 
2503
		ret = ring->init_context(req);
1884
			ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
2504
		if (ret) {
-
 
2505
			DRM_ERROR("ring init context: %d\n",
-
 
2506
				ret);
-
 
2507
			i915_gem_request_cancel(req);
-
 
2508
			goto error_ringbuf;
-
 
2509
		}
-
 
2510
		i915_add_request_no_flush(req);
-
 
2511
	}
-
 
2512
	return 0;
1885
			if (ret) {
2513
 
1886
				DRM_ERROR(
2514
error_ringbuf:
1887
					"Failed to pin and map ringbuffer %s: %d\n",
2515
	intel_ringbuffer_free(ringbuf);
-
 
2516
error_deref_obj:
1888
					ring->name, ret);
2517
	drm_gem_object_unreference(&ctx_obj->base);
1889
				goto error_destroy_rbuf;
2518
	ctx->engine[ring->id].ringbuf = NULL;
1890
			}
2519
	ctx->engine[ring->id].state = NULL;
-
 
2520
	return ret;
-
 
2521
}
1891
		}
2522
 
-
 
2523
void intel_lr_context_reset(struct drm_device *dev,
1892
 
2524
			struct intel_context *ctx)
-
 
2525
{
-
 
2526
	struct drm_i915_private *dev_priv = dev->dev_private;
1893
	}
2527
	struct intel_engine_cs *ring;
1894
 
2528
	int i;
1895
	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
2529
 
1896
	if (ret) {
-
 
1897
		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
2530
	for_each_ring(ring, dev_priv, i) {
1898
		goto error;
2531
		struct drm_i915_gem_object *ctx_obj =
1899
	}
-
 
1900
 
2532
				ctx->engine[ring->id].state;
-
 
2533
		struct intel_ringbuffer *ringbuf =
-
 
2534
				ctx->engine[ring->id].ringbuf;
1901
	ctx->engine[ring->id].ringbuf = ringbuf;
-
 
1902
	ctx->engine[ring->id].state = ctx_obj;
-
 
1903
 
-
 
1904
	if (ctx == ring->default_context)
2535
		uint32_t *reg_state;
1905
		lrc_setup_hardware_status_page(ring, ctx_obj);
2536
		struct page *page;
1906
 
-
 
1907
	if (ring->id == RCS && !ctx->rcs_initialized) {
-
 
1908
		if (ring->init_context) {
-
 
1909
			ret = ring->init_context(ring, ctx);
-
 
1910
			if (ret)
2537
 
1911
				DRM_ERROR("ring init context: %d\n", ret);
-
 
1912
		}
-
 
1913
 
-
 
1914
		ret = intel_lr_context_render_state_init(ring, ctx);
-
 
1915
		if (ret) {
-
 
1916
			DRM_ERROR("Init render state failed: %d\n", ret);
-
 
1917
			ctx->engine[ring->id].ringbuf = NULL;
2538
		if (!ctx_obj)
1918
			ctx->engine[ring->id].state = NULL;
-
 
1919
			goto error;
2539
			continue;
1920
		}
-
 
1921
		ctx->rcs_initialized = true;
-
 
1922
	}
2540
 
1923
 
2541
		if (i915_gem_object_get_pages(ctx_obj)) {
1924
	return 0;
2542
			WARN(1, "Failed get_pages for context obj\n");
1925
 
2543
			continue;
1926
error:
2544
		}
1927
	if (is_global_default_ctx)
2545
		page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
1928
		intel_unpin_ringbuffer_obj(ringbuf);
2546
		reg_state = kmap_atomic(page);
1929
error_destroy_rbuf:
2547
 
1930
	intel_destroy_ringbuffer_obj(ringbuf);
2548
		reg_state[CTX_RING_HEAD+1] = 0;
1931
error_free_rbuf:
2549
		reg_state[CTX_RING_TAIL+1] = 0;
1932
	kfree(ringbuf);
2550
 
1933
error_unpin_ctx:
2551
		kunmap_atomic(reg_state);
1934
	if (is_global_default_ctx)
2552
 
1935
		i915_gem_object_ggtt_unpin(ctx_obj);
2553
		ringbuf->head = 0;
1936
	drm_gem_object_unreference(&ctx_obj->base);
2554
		ringbuf->tail = 0;
1937
	return ret;
2555
	}
1938
}
2556
}
1939
>
2557
>
1940
>
2558
>
1941
>
2559
>
1942
>
2560
>
1943
enum>
2561
>
1944
enum>
2562
>
1945
#define>
2563
>
1946
#define>
2564
>
1947
#define>
2565
>
1948
#define>
2566
>
1949
#define>
2567
 
1950
#define>
2568
#define>
1951
#define>
2569
 
1952
#define>
2570
#define>
-
 
2571
#define>
-
 
2572
#define>
-
 
2573
#define>
-
 
2574
#define>
-
 
2575
#define>
-
 
2576
#define>
-
 
2577
#define>
-
 
2578
#define>