Rev 4418 | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 4418 | Rev 4547 | ||
---|---|---|---|
1 | ; Implementation of periodic transaction scheduler for USB. |
1 | ; Implementation of periodic transaction scheduler for USB. |
2 | ; Bandwidth dedicated to periodic transactions is limited, so |
2 | ; Bandwidth dedicated to periodic transactions is limited, so |
3 | ; different pipes should be scheduled as uniformly as possible. |
3 | ; different pipes should be scheduled as uniformly as possible. |
4 | 4 | ||
5 | ; USB1 scheduler. |
5 | ; USB1 scheduler. |
6 | ; Algorithm is simple: |
6 | ; Algorithm is simple: |
7 | ; when adding a pipe, optimize the following quantity: |
7 | ; when adding a pipe, optimize the following quantity: |
8 | ; * for every millisecond, take all bandwidth scheduled to periodic transfers, |
8 | ; * for every millisecond, take all bandwidth scheduled to periodic transfers, |
9 | ; * calculate maximum over all milliseconds, |
9 | ; * calculate maximum over all milliseconds, |
10 | ; * select a variant which minimizes that maximum; |
10 | ; * select a variant which minimizes that maximum; |
11 | ; when removing a pipe, do nothing (except for bookkeeping). |
11 | ; when removing a pipe, do nothing (except for bookkeeping). |
12 | 12 | ||
13 | ; The caller must provide CONTROLLER_NAME define. |
13 | ; The caller must provide CONTROLLER_NAME define. |
14 | macro define_controller_name name |
14 | macro define_controller_name name |
15 | { |
15 | { |
16 | _hci_static_ep.SoftwarePart = name # _static_ep.SoftwarePart |
16 | _hci_static_ep.SoftwarePart = name # _static_ep.SoftwarePart |
17 | _hci_static_ep.NextList = name # _static_ep.NextList |
17 | _hci_static_ep.NextList = name # _static_ep.NextList |
18 | sizeof._hci_static_ep = sizeof. # name # _static_ep |
18 | sizeof._hci_static_ep = sizeof. # name # _static_ep |
19 | } |
19 | } |
20 | 20 | ||
21 | ; Select a list for a new pipe. |
21 | ; Select a list for a new pipe. |
22 | ; in: esi -> usb_controller, maxpacket, type, interval can be found in the stack |
22 | ; in: esi -> usb_controller, maxpacket, type, interval can be found in the stack |
23 | ; in: ecx = 2 * maximal interval = total number of periodic lists + 1 |
23 | ; in: ecx = 2 * maximal interval = total number of periodic lists + 1 |
24 | ; in: edx -> {u|o}hci_static_ep for the first list |
24 | ; in: edx -> {u|o}hci_static_ep for the first list |
25 | ; in: eax -> byte past {u|o}hci_static_ep for the last list in the first group |
25 | ; in: eax -> byte past {u|o}hci_static_ep for the last list in the first group |
26 | ; out: edx -> usb_static_ep for the selected list or zero if failed |
26 | ; out: edx -> usb_static_ep for the selected list or zero if failed |
27 | proc usb1_select_interrupt_list |
27 | proc usb1_select_interrupt_list |
28 | ; inherit some variables from usb_open_pipe |
28 | ; inherit some variables from usb_open_pipe |
29 | virtual at ebp-12 |
29 | virtual at ebp-12 |
30 | .speed db ? |
30 | .speed db ? |
31 | rb 3 |
31 | rb 3 |
32 | .bandwidth dd ? |
32 | .bandwidth dd ? |
33 | .target dd ? |
33 | .target dd ? |
34 | dd ? |
34 | dd ? |
35 | dd ? |
35 | dd ? |
36 | .config_pipe dd ? |
36 | .config_pipe dd ? |
37 | .endpoint dd ? |
37 | .endpoint dd ? |
38 | .maxpacket dd ? |
38 | .maxpacket dd ? |
39 | .type dd ? |
39 | .type dd ? |
40 | .interval dd ? |
40 | .interval dd ? |
41 | end virtual |
41 | end virtual |
42 | push ebx edi ; save used registers to be stdcall |
42 | push ebx edi ; save used registers to be stdcall |
43 | push eax ; save eax for checks in step 3 |
43 | push eax ; save eax for checks in step 3 |
44 | ; 1. Only intervals 2^k ms can be supported. |
44 | ; 1. Only intervals 2^k ms can be supported. |
45 | ; The core specification says that the real interval should not be greater |
45 | ; The core specification says that the real interval should not be greater |
46 | ; than the interval given by the endpoint descriptor, but can be less. |
46 | ; than the interval given by the endpoint descriptor, but can be less. |
47 | ; Determine the actual interval as 2^k ms. |
47 | ; Determine the actual interval as 2^k ms. |
48 | mov eax, ecx |
48 | mov eax, ecx |
49 | ; 1a. Set [.interval] to 1 if it was zero; leave it as is otherwise |
49 | ; 1a. Set [.interval] to 1 if it was zero; leave it as is otherwise |
50 | cmp [.interval], 1 |
50 | cmp [.interval], 1 |
51 | adc [.interval], 0 |
51 | adc [.interval], 0 |
52 | ; 1b. Divide ecx by two while it is strictly greater than [.interval]. |
52 | ; 1b. Divide ecx by two while it is strictly greater than [.interval]. |
53 | @@: |
53 | @@: |
54 | shr ecx, 1 |
54 | shr ecx, 1 |
55 | cmp [.interval], ecx |
55 | cmp [.interval], ecx |
56 | jb @b |
56 | jb @b |
57 | ; ecx = the actual interval |
57 | ; ecx = the actual interval |
58 | ; |
58 | ; |
59 | ; For example, let ecx = 8, eax = 64. |
59 | ; For example, let ecx = 8, eax = 64. |
60 | ; The scheduler space is 32 milliseconds, |
60 | ; The scheduler space is 32 milliseconds, |
61 | ; we need to schedule something every 8 ms; |
61 | ; we need to schedule something every 8 ms; |
62 | ; there are 8 variants: schedule at times 0,8,16,24, |
62 | ; there are 8 variants: schedule at times 0,8,16,24, |
63 | ; schedule at times 1,9,17,25,..., schedule at times 7,15,23,31. |
63 | ; schedule at times 1,9,17,25,..., schedule at times 7,15,23,31. |
64 | ; Now concentrate: there are three nested loops, |
64 | ; Now concentrate: there are three nested loops, |
65 | ; * the innermost loop calculates the total periodic bandwidth scheduled |
65 | ; * the innermost loop calculates the total periodic bandwidth scheduled |
66 | ; in the given millisecond, |
66 | ; in the given millisecond, |
67 | ; * the intermediate loop calculates the maximum over all milliseconds |
67 | ; * the intermediate loop calculates the maximum over all milliseconds |
68 | ; in the given variant, that is the quantity we're trying to minimize, |
68 | ; in the given variant, that is the quantity we're trying to minimize, |
69 | ; * the outermost loop checks all variants. |
69 | ; * the outermost loop checks all variants. |
70 | ; 2. Calculate offset between the first list and the first list for the |
70 | ; 2. Calculate offset between the first list and the first list for the |
71 | ; selected interval, in bytes; save in the stack for step 4. |
71 | ; selected interval, in bytes; save in the stack for step 4. |
72 | sub eax, ecx |
72 | sub eax, ecx |
73 | sub eax, ecx |
73 | sub eax, ecx |
74 | imul eax, sizeof._hci_static_ep |
74 | imul eax, sizeof._hci_static_ep |
75 | push eax |
75 | push eax |
76 | imul ebx, ecx, sizeof._hci_static_ep |
76 | imul ebx, ecx, sizeof._hci_static_ep |
77 | ; 3. Select the best variant. |
77 | ; 3. Select the best variant. |
78 | ; 3a. The outermost loop. |
78 | ; 3a. The outermost loop. |
79 | ; Prepare for the loop: set the current optimal bandwidth to maximum |
79 | ; Prepare for the loop: set the current optimal bandwidth to maximum |
80 | ; possible value (so that any variant will pass the first comparison), |
80 | ; possible value (so that any variant will pass the first comparison), |
81 | ; calculate delta for the intermediate loop. |
81 | ; calculate delta for the intermediate loop. |
82 | or [.bandwidth], -1 |
82 | or [.bandwidth], -1 |
83 | .varloop: |
83 | .varloop: |
84 | ; 3b. The intermediate loop. |
84 | ; 3b. The intermediate loop. |
85 | ; Prepare for the loop: set the maximum to be calculated to zero, |
85 | ; Prepare for the loop: set the maximum to be calculated to zero, |
86 | ; save counter of the outermost loop. |
86 | ; save counter of the outermost loop. |
87 | xor edi, edi |
87 | xor edi, edi |
88 | push edx |
88 | push edx |
89 | virtual at esp |
89 | virtual at esp |
90 | .cur_variant dd ? ; step 3b |
90 | .cur_variant dd ? ; step 3b |
91 | .result_delta dd ? ; step 2 |
91 | .result_delta dd ? ; step 2 |
92 | .group1_limit dd ? ; function prolog |
92 | .group1_limit dd ? ; function prolog |
93 | end virtual |
93 | end virtual |
94 | .calc_max_bandwidth: |
94 | .calc_max_bandwidth: |
95 | ; 3c. The innermost loop. Sum over all lists. |
95 | ; 3c. The innermost loop. Sum over all lists. |
96 | xor eax, eax |
96 | xor eax, eax |
97 | push edx |
97 | push edx |
98 | .calc_bandwidth: |
98 | .calc_bandwidth: |
99 | add eax, [edx+_hci_static_ep.SoftwarePart+usb_static_ep.Bandwidth] |
99 | add eax, [edx+_hci_static_ep.SoftwarePart+usb_static_ep.Bandwidth] |
100 | mov edx, [edx+_hci_static_ep.NextList] |
100 | mov edx, [edx+_hci_static_ep.NextList] |
101 | test edx, edx |
101 | test edx, edx |
102 | jnz .calc_bandwidth |
102 | jnz .calc_bandwidth |
103 | pop edx |
103 | pop edx |
104 | ; 3d. The intermediate loop continued: update maximum. |
104 | ; 3d. The intermediate loop continued: update maximum. |
105 | cmp eax, edi |
105 | cmp eax, edi |
106 | jb @f |
106 | jb @f |
107 | mov edi, eax |
107 | mov edi, eax |
108 | @@: |
108 | @@: |
109 | ; 3e. The intermediate loop continued: advance counter. |
109 | ; 3e. The intermediate loop continued: advance counter. |
110 | add edx, ebx |
110 | add edx, ebx |
111 | cmp edx, [.group1_limit] |
111 | cmp edx, [.group1_limit] |
112 | jb .calc_max_bandwidth |
112 | jb .calc_max_bandwidth |
113 | ; 3e. The intermediate loop done: restore counter of the outermost loop. |
113 | ; 3e. The intermediate loop done: restore counter of the outermost loop. |
114 | pop edx |
114 | pop edx |
115 | ; 3f. The outermost loop continued: if the current variant is |
115 | ; 3f. The outermost loop continued: if the current variant is |
116 | ; better (maybe not strictly) then the previous optimum, update |
116 | ; better (maybe not strictly) then the previous optimum, update |
117 | ; the optimal bandwidth and resulting list. |
117 | ; the optimal bandwidth and resulting list. |
118 | cmp edi, [.bandwidth] |
118 | cmp edi, [.bandwidth] |
119 | ja @f |
119 | ja @f |
120 | mov [.bandwidth], edi |
120 | mov [.bandwidth], edi |
121 | mov [.target], edx |
121 | mov [.target], edx |
122 | @@: |
122 | @@: |
123 | ; 3g. The outermost loop continued: advance counter. |
123 | ; 3g. The outermost loop continued: advance counter. |
124 | add edx, sizeof._hci_static_ep |
124 | add edx, sizeof._hci_static_ep |
125 | dec ecx |
125 | dec ecx |
126 | jnz .varloop |
126 | jnz .varloop |
127 | ; 4. Calculate bandwidth for the new pipe. |
127 | ; 4. Calculate bandwidth for the new pipe. |
128 | mov eax, [.maxpacket] |
128 | mov eax, [.maxpacket] |
129 | mov cl, [.speed] |
129 | mov cl, [.speed] |
130 | mov ch, byte [.endpoint] |
130 | mov ch, byte [.endpoint] |
131 | and ch, 80h |
131 | and ch, 80h |
132 | call calc_usb1_bandwidth |
132 | call calc_usb1_bandwidth |
133 | ; 5. Get the pointer to the best list. |
133 | ; 5. Get the pointer to the best list. |
134 | pop edx ; restore value from step 2 |
134 | pop edx ; restore value from step 2 |
135 | pop ecx ; purge stack var from prolog |
135 | pop ecx ; purge stack var from prolog |
136 | add edx, [.target] |
136 | add edx, [.target] |
137 | ; 6. Check that bandwidth for the new pipe plus old bandwidth |
137 | ; 6. Check that bandwidth for the new pipe plus old bandwidth |
138 | ; still fits to maximum allowed by the core specification, 90% of 12000 bits. |
138 | ; still fits to maximum allowed by the core specification, 90% of 12000 bits. |
139 | mov ecx, eax |
139 | mov ecx, eax |
140 | add ecx, [.bandwidth] |
140 | add ecx, [.bandwidth] |
141 | cmp ecx, 10800 |
141 | cmp ecx, 10800 |
142 | ja .no_bandwidth |
142 | ja .no_bandwidth |
143 | ; 7. Convert {o|u}hci_static_ep to usb_static_ep, update bandwidth and return. |
143 | ; 7. Convert {o|u}hci_static_ep to usb_static_ep, update bandwidth and return. |
144 | add edx, _hci_static_ep.SoftwarePart |
144 | add edx, _hci_static_ep.SoftwarePart |
145 | add [edx+usb_static_ep.Bandwidth], eax |
145 | add [edx+usb_static_ep.Bandwidth], eax |
146 | pop edi ebx ; restore used registers to be stdcall |
146 | pop edi ebx ; restore used registers to be stdcall |
147 | ret |
147 | ret |
148 | .no_bandwidth: |
148 | .no_bandwidth: |
149 | dbgstr 'Periodic bandwidth limit reached' |
149 | dbgstr 'Periodic bandwidth limit reached' |
150 | xor edx, edx |
150 | xor edx, edx |
151 | pop edi ebx |
151 | pop edi ebx |
152 | ret |
152 | ret |
153 | endp |
153 | endp |
154 | 154 | ||
155 | ; Pipe is removing, update the corresponding lists. |
155 | ; Pipe is removing, update the corresponding lists. |
156 | ; We do not reorder anything, so just update book-keeping variable |
156 | ; We do not reorder anything, so just update book-keeping variable |
157 | ; in the list header. |
157 | ; in the list header. |
158 | proc usb1_interrupt_list_unlink |
158 | proc usb1_interrupt_list_unlink |
159 | virtual at esp |
159 | virtual at esp |
160 | dd ? ; return address |
160 | dd ? ; return address |
161 | .maxpacket dd ? |
161 | .maxpacket dd ? |
162 | .lowspeed db ? |
162 | .lowspeed db ? |
163 | .direction db ? |
163 | .direction db ? |
164 | rb 2 |
164 | rb 2 |
165 | end virtual |
165 | end virtual |
166 | ; calculate bandwidth on the bus |
166 | ; calculate bandwidth on the bus |
167 | mov eax, [.maxpacket] |
167 | mov eax, [.maxpacket] |
168 | mov ecx, dword [.lowspeed] |
168 | mov ecx, dword [.lowspeed] |
169 | call calc_usb1_bandwidth |
169 | call calc_usb1_bandwidth |
170 | ; find list header |
- | |
171 | mov edx, ebx |
- | |
172 | @@: |
- | |
173 | mov edx, [edx+usb_pipe.NextVirt] |
170 | mov edx, [ebx+usb_pipe.BaseList] |
174 | cmp [edx+usb_pipe.Controller], esi |
- | |
175 | jz @b |
- | |
176 | ; subtract pipe bandwidth |
171 | ; subtract pipe bandwidth |
177 | sub [edx+usb_static_ep.Bandwidth], eax |
172 | sub [edx+usb_static_ep.Bandwidth], eax |
178 | ret 8 |
173 | ret 8 |
179 | endp |
174 | endp |
180 | 175 | ||
181 | ; Helper procedure for USB1 scheduler: calculate bandwidth on the bus. |
176 | ; Helper procedure for USB1 scheduler: calculate bandwidth on the bus. |
182 | ; in: low 11 bits of eax = payload size in bytes |
177 | ; in: low 11 bits of eax = payload size in bytes |
183 | ; in: cl = 0 - full-speed, nonzero - high-speed |
178 | ; in: cl = 0 - full-speed, nonzero - high-speed |
184 | ; in: ch = 0 - OUT, nonzero - IN |
179 | ; in: ch = 0 - OUT, nonzero - IN |
185 | ; out: eax = maximal bandwidth in FS-bits |
180 | ; out: eax = maximal bandwidth in FS-bits |
186 | proc calc_usb1_bandwidth |
181 | proc calc_usb1_bandwidth |
187 | and eax, (1 shl 11) - 1 ; get payload for one transaction |
182 | and eax, (1 shl 11) - 1 ; get payload for one transaction |
188 | add eax, 3 ; add 3 bytes for other fields in data packet, PID+CRC16 |
183 | add eax, 3 ; add 3 bytes for other fields in data packet, PID+CRC16 |
189 | test cl, cl |
184 | test cl, cl |
190 | jnz .low_speed |
185 | jnz .low_speed |
191 | ; Multiply by 8 for bytes -> bits, by 7/6 to accomodate bit stuffing |
186 | ; Multiply by 8 for bytes -> bits, by 7/6 to accomodate bit stuffing |
192 | ; and by 401/400 for IN transfers to accomodate timers difference |
187 | ; and by 401/400 for IN transfers to accomodate timers difference |
193 | ; 9+107/300 for IN transfers, 9+1/3 for OUT transfers |
188 | ; 9+107/300 for IN transfers, 9+1/3 for OUT transfers |
194 | ; For 0 <= eax < 09249355h, floor(eax * 107/300) = floor(eax * 5B4E81B5h / 2^32). |
189 | ; For 0 <= eax < 09249355h, floor(eax * 107/300) = floor(eax * 5B4E81B5h / 2^32). |
195 | ; For 0 <= eax < 80000000h, floor(eax / 3) = floor(eax * 55555556h / 2^32). |
190 | ; For 0 <= eax < 80000000h, floor(eax / 3) = floor(eax * 55555556h / 2^32). |
196 | mov edx, 55555556h |
191 | mov edx, 55555556h |
197 | test ch, ch |
192 | test ch, ch |
198 | jz @f |
193 | jz @f |
199 | mov edx, 5B4E81B5h |
194 | mov edx, 5B4E81B5h |
200 | @@: |
195 | @@: |
201 | lea ecx, [eax*9] |
196 | lea ecx, [eax*9] |
202 | mul edx |
197 | mul edx |
203 | ; Add 93 extra bits: 39 bits for Token packet (8 for SYNC, 24 for token+address, |
198 | ; Add 93 extra bits: 39 bits for Token packet (8 for SYNC, 24 for token+address, |
204 | ; 4 extra bits for possible bit stuffing in token+address, 3 for EOP), |
199 | ; 4 extra bits for possible bit stuffing in token+address, 3 for EOP), |
205 | ; 18 bits for bus turn-around, 11 bits for SYNC+EOP in Data packet plus 1 bit |
200 | ; 18 bits for bus turn-around, 11 bits for SYNC+EOP in Data packet plus 1 bit |
206 | ; for possible timers difference, 2 bits for inter-packet delay, 20 bits for |
201 | ; for possible timers difference, 2 bits for inter-packet delay, 20 bits for |
207 | ; Handshake packet, 2 bits for another inter-packet delay. |
202 | ; Handshake packet, 2 bits for another inter-packet delay. |
208 | lea eax, [ecx+edx+93] |
203 | lea eax, [ecx+edx+93] |
209 | ret |
204 | ret |
210 | .low_speed: |
205 | .low_speed: |
211 | ; Multiply by 8 for bytes -> bits, by 7/6 to accomodate bit stuffing, |
206 | ; Multiply by 8 for bytes -> bits, by 7/6 to accomodate bit stuffing, |
212 | ; by 8 for LS -> FS and by 406/50 for IN transfers to accomodate timers difference. |
207 | ; by 8 for LS -> FS and by 406/50 for IN transfers to accomodate timers difference. |
213 | ; 75+59/75 for IN transfers, 74+2/3 for OUT transfers. |
208 | ; 75+59/75 for IN transfers, 74+2/3 for OUT transfers. |
214 | mov edx, 0AAAAAABh |
209 | mov edx, 0AAAAAABh |
215 | test ch, ch |
210 | test ch, ch |
216 | mov ecx, 74 |
211 | mov ecx, 74 |
217 | jz @f |
212 | jz @f |
218 | mov edx, 0C962FC97h |
213 | mov edx, 0C962FC97h |
219 | inc ecx |
214 | inc ecx |
220 | @@: |
215 | @@: |
221 | imul ecx, eax |
216 | imul ecx, eax |
222 | mul edx |
217 | mul edx |
223 | ; Add 778 extra bits: |
218 | ; Add 778 extra bits: |
224 | ; 16 bits for PRE packet, 4 bits for hub delay, 8*39 bits for Token packet |
219 | ; 16 bits for PRE packet, 4 bits for hub delay, 8*39 bits for Token packet |
225 | ; 8*18 bits for bus turn-around |
220 | ; 8*18 bits for bus turn-around |
226 | ; (406/50)*11 bits for SYNC+EOP in Data packet, |
221 | ; (406/50)*11 bits for SYNC+EOP in Data packet, |
227 | ; 8*2 bits for inter-packet delay, |
222 | ; 8*2 bits for inter-packet delay, |
228 | ; 16 bits for PRE packet, 4 bits for hub delay, 8*20 bits for Handshake packet, |
223 | ; 16 bits for PRE packet, 4 bits for hub delay, 8*20 bits for Handshake packet, |
229 | ; 8*2 bits for another inter-packet delay. |
224 | ; 8*2 bits for another inter-packet delay. |
230 | lea eax, [ecx+edx+778] |
225 | lea eax, [ecx+edx+778] |
231 | ret |
226 | ret |
232 | endp>=>>=> |
227 | endp>=>>=> |