Rev 3826 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 3826 | Rev 4418 | ||
---|---|---|---|
Line 1... | Line 1... | ||
1 | ; Implementation of periodic transaction scheduler for USB. |
1 | ; Implementation of periodic transaction scheduler for USB. |
2 | ; Bandwidth dedicated to periodic transactions is limited, so |
2 | ; Bandwidth dedicated to periodic transactions is limited, so |
3 | ; different pipes should be scheduled as uniformly as possible. |
3 | ; different pipes should be scheduled as uniformly as possible. |
Line 4... | Line -... | ||
4 | - | ||
5 | ; USB1 scheduler. |
- | |
6 | ; Algorithm is simple: |
- | |
7 | ; when adding a pipe, optimize the following quantity: |
- | |
8 | ; * for every millisecond, take all bandwidth scheduled to periodic transfers, |
- | |
9 | ; * calculate maximum over all milliseconds, |
- | |
10 | ; * select a variant which minimizes that maximum; |
- | |
11 | ; when removing a pipe, do nothing (except for bookkeeping). |
- | |
12 | - | ||
13 | ; sanity check: structures in UHCI and OHCI should be the same |
- | |
14 | if (sizeof.ohci_static_ep=sizeof.uhci_static_ep)&(ohci_static_ep.SoftwarePart=uhci_static_ep.SoftwarePart)&(ohci_static_ep.NextList=uhci_static_ep.NextList) |
- | |
15 | ; Select a list for a new pipe. |
- | |
16 | ; in: esi -> usb_controller, maxpacket, type, interval can be found in the stack |
- | |
17 | ; in: ecx = 2 * maximal interval = total number of periodic lists + 1 |
- | |
18 | ; in: edx -> {u|o}hci_static_ep for the first list |
- | |
19 | ; in: eax -> byte past {u|o}hci_static_ep for the last list in the first group |
- | |
20 | ; out: edx -> usb_static_ep for the selected list or zero if failed |
- | |
21 | proc usb1_select_interrupt_list |
- | |
22 | ; inherit some variables from usb_open_pipe |
- | |
23 | virtual at ebp-12 |
- | |
24 | .speed db ? |
- | |
25 | rb 3 |
- | |
26 | .bandwidth dd ? |
- | |
27 | .target dd ? |
- | |
28 | dd ? |
- | |
29 | dd ? |
- | |
30 | .config_pipe dd ? |
- | |
31 | .endpoint dd ? |
- | |
32 | .maxpacket dd ? |
- | |
33 | .type dd ? |
- | |
34 | .interval dd ? |
- | |
35 | end virtual |
- | |
36 | push ebx edi ; save used registers to be stdcall |
- | |
37 | push eax ; save eax for checks in step 3 |
- | |
38 | ; 1. Only intervals 2^k ms can be supported. |
- | |
39 | ; The core specification says that the real interval should not be greater |
- | |
40 | ; than the interval given by the endpoint descriptor, but can be less. |
- | |
41 | ; Determine the actual interval as 2^k ms. |
- | |
42 | mov eax, ecx |
- | |
43 | ; 1a. Set [.interval] to 1 if it was zero; leave it as is otherwise |
- | |
44 | cmp [.interval], 1 |
- | |
45 | adc [.interval], 0 |
- | |
46 | ; 1b. Divide ecx by two while it is strictly greater than [.interval]. |
- | |
47 | @@: |
- | |
48 | shr ecx, 1 |
- | |
49 | cmp [.interval], ecx |
- | |
50 | jb @b |
- | |
51 | ; ecx = the actual interval |
- | |
52 | ; |
- | |
53 | ; For example, let ecx = 8, eax = 64. |
- | |
54 | ; The scheduler space is 32 milliseconds, |
- | |
55 | ; we need to schedule something every 8 ms; |
- | |
56 | ; there are 8 variants: schedule at times 0,8,16,24, |
- | |
57 | ; schedule at times 1,9,17,25,..., schedule at times 7,15,23,31. |
- | |
58 | ; Now concentrate: there are three nested loops, |
- | |
59 | ; * the innermost loop calculates the total periodic bandwidth scheduled |
- | |
60 | ; in the given millisecond, |
- | |
61 | ; * the intermediate loop calculates the maximum over all milliseconds |
- | |
62 | ; in the given variant, that is the quantity we're trying to minimize, |
- | |
63 | ; * the outermost loop checks all variants. |
- | |
64 | ; 2. Calculate offset between the first list and the first list for the |
- | |
65 | ; selected interval, in bytes; save in the stack for step 4. |
- | |
66 | sub eax, ecx |
- | |
67 | sub eax, ecx |
- | |
68 | imul eax, sizeof.ohci_static_ep |
- | |
69 | push eax |
- | |
70 | imul ebx, ecx, sizeof.ohci_static_ep |
- | |
71 | ; 3. Select the best variant. |
- | |
72 | ; 3a. The outermost loop. |
- | |
73 | ; Prepare for the loop: set the current optimal bandwidth to maximum |
- | |
74 | ; possible value (so that any variant will pass the first comparison), |
- | |
75 | ; calculate delta for the intermediate loop. |
- | |
76 | or [.bandwidth], -1 |
- | |
77 | .varloop: |
- | |
78 | ; 3b. The intermediate loop. |
- | |
79 | ; Prepare for the loop: set the maximum to be calculated to zero, |
- | |
80 | ; save counter of the outermost loop. |
- | |
81 | xor edi, edi |
- | |
82 | push edx |
- | |
83 | virtual at esp |
- | |
84 | .cur_variant dd ? ; step 3b |
- | |
85 | .result_delta dd ? ; step 2 |
- | |
86 | .group1_limit dd ? ; function prolog |
- | |
87 | end virtual |
- | |
88 | .calc_max_bandwidth: |
- | |
89 | ; 3c. The innermost loop. Sum over all lists. |
- | |
90 | xor eax, eax |
- | |
91 | push edx |
- | |
92 | .calc_bandwidth: |
- | |
93 | add eax, [edx+ohci_static_ep.SoftwarePart+usb_static_ep.Bandwidth] |
- | |
94 | mov edx, [edx+ohci_static_ep.NextList] |
- | |
95 | test edx, edx |
- | |
96 | jnz .calc_bandwidth |
- | |
97 | pop edx |
- | |
98 | ; 3d. The intermediate loop continued: update maximum. |
- | |
99 | cmp eax, edi |
- | |
100 | jb @f |
- | |
101 | mov edi, eax |
- | |
102 | @@: |
- | |
103 | ; 3e. The intermediate loop continued: advance counter. |
- | |
104 | add edx, ebx |
- | |
105 | cmp edx, [.group1_limit] |
- | |
106 | jb .calc_max_bandwidth |
- | |
107 | ; 3e. The intermediate loop done: restore counter of the outermost loop. |
- | |
108 | pop edx |
- | |
109 | ; 3f. The outermost loop continued: if the current variant is |
- | |
110 | ; better (maybe not strictly) then the previous optimum, update |
- | |
111 | ; the optimal bandwidth and resulting list. |
- | |
112 | cmp edi, [.bandwidth] |
- | |
113 | ja @f |
- | |
114 | mov [.bandwidth], edi |
- | |
115 | mov [.target], edx |
- | |
116 | @@: |
- | |
117 | ; 3g. The outermost loop continued: advance counter. |
- | |
118 | add edx, sizeof.ohci_static_ep |
- | |
119 | dec ecx |
- | |
120 | jnz .varloop |
- | |
121 | ; 4. Calculate bandwidth for the new pipe. |
- | |
122 | mov eax, [.maxpacket] |
- | |
123 | mov cl, [.speed] |
- | |
124 | mov ch, byte [.endpoint] |
- | |
125 | and ch, 80h |
- | |
126 | call calc_usb1_bandwidth |
- | |
127 | ; 5. Get the pointer to the best list. |
- | |
128 | pop edx ; restore value from step 2 |
- | |
129 | pop ecx ; purge stack var from prolog |
- | |
130 | add edx, [.target] |
- | |
131 | ; 6. Check that bandwidth for the new pipe plus old bandwidth |
- | |
132 | ; still fits to maximum allowed by the core specification, 90% of 12000 bits. |
- | |
133 | mov ecx, eax |
- | |
134 | add ecx, [.bandwidth] |
- | |
135 | cmp ecx, 10800 |
- | |
136 | ja .no_bandwidth |
- | |
137 | ; 7. Convert {o|u}hci_static_ep to usb_static_ep, update bandwidth and return. |
- | |
138 | add edx, ohci_static_ep.SoftwarePart |
- | |
139 | add [edx+usb_static_ep.Bandwidth], eax |
- | |
140 | pop edi ebx ; restore used registers to be stdcall |
- | |
141 | ret |
- | |
142 | .no_bandwidth: |
- | |
143 | dbgstr 'Periodic bandwidth limit reached' |
- | |
144 | xor edx, edx |
- | |
145 | pop edi ebx |
- | |
146 | ret |
- | |
147 | endp |
- | |
148 | ; sanity check, part 2 |
- | |
149 | else |
- | |
150 | .err select_interrupt_list must be different for UHCI and OHCI |
- | |
151 | end if |
- | |
152 | - | ||
153 | ; Pipe is removing, update the corresponding lists. |
- | |
154 | ; We do not reorder anything, so just update book-keeping variable |
- | |
155 | ; in the list header. |
- | |
156 | proc usb1_interrupt_list_unlink |
- | |
157 | virtual at esp |
- | |
158 | dd ? ; return address |
- | |
159 | .maxpacket dd ? |
- | |
160 | .lowspeed db ? |
- | |
161 | .direction db ? |
- | |
162 | rb 2 |
- | |
163 | end virtual |
- | |
164 | ; calculate bandwidth on the bus |
- | |
165 | mov eax, [.maxpacket] |
- | |
166 | mov ecx, dword [.lowspeed] |
- | |
167 | call calc_usb1_bandwidth |
- | |
168 | ; find list header |
- | |
169 | mov edx, ebx |
- | |
170 | @@: |
- | |
171 | mov edx, [edx+usb_pipe.NextVirt] |
- | |
172 | cmp [edx+usb_pipe.Controller], esi |
- | |
173 | jz @b |
- | |
174 | ; subtract pipe bandwidth |
- | |
175 | sub [edx+usb_static_ep.Bandwidth], eax |
- | |
176 | ret 8 |
- | |
177 | endp |
- | |
178 | - | ||
179 | ; Helper procedure for USB1 scheduler: calculate bandwidth on the bus. |
- | |
180 | ; in: low 11 bits of eax = payload size in bytes |
- | |
181 | ; in: cl = 0 - full-speed, nonzero - high-speed |
- | |
182 | ; in: ch = 0 - OUT, nonzero - IN |
- | |
183 | ; out: eax = maximal bandwidth in FS-bits |
- | |
184 | proc calc_usb1_bandwidth |
- | |
185 | and eax, (1 shl 11) - 1 ; get payload for one transaction |
- | |
186 | add eax, 3 ; add 3 bytes for other fields in data packet, PID+CRC16 |
- | |
187 | test cl, cl |
- | |
188 | jnz .low_speed |
- | |
189 | ; Multiply by 8 for bytes -> bits, by 7/6 to accomodate bit stuffing |
- | |
190 | ; and by 401/400 for IN transfers to accomodate timers difference |
- | |
191 | ; 9+107/300 for IN transfers, 9+1/3 for OUT transfers |
- | |
192 | ; For 0 <= eax < 09249355h, floor(eax * 107/300) = floor(eax * 5B4E81B5h / 2^32). |
- | |
193 | ; For 0 <= eax < 80000000h, floor(eax / 3) = floor(eax * 55555556h / 2^32). |
- | |
194 | mov edx, 55555556h |
- | |
195 | test ch, ch |
- | |
196 | jz @f |
- | |
197 | mov edx, 5B4E81B5h |
- | |
198 | @@: |
- | |
199 | lea ecx, [eax*9] |
- | |
200 | mul edx |
- | |
201 | ; Add 93 extra bits: 39 bits for Token packet (8 for SYNC, 24 for token+address, |
- | |
202 | ; 4 extra bits for possible bit stuffing in token+address, 3 for EOP), |
- | |
203 | ; 18 bits for bus turn-around, 11 bits for SYNC+EOP in Data packet plus 1 bit |
- | |
204 | ; for possible timers difference, 2 bits for inter-packet delay, 20 bits for |
- | |
205 | ; Handshake packet, 2 bits for another inter-packet delay. |
- | |
206 | lea eax, [ecx+edx+93] |
- | |
207 | ret |
- | |
208 | .low_speed: |
- | |
209 | ; Multiply by 8 for bytes -> bits, by 7/6 to accomodate bit stuffing, |
- | |
210 | ; by 8 for LS -> FS and by 406/50 for IN transfers to accomodate timers difference. |
- | |
211 | ; 75+59/75 for IN transfers, 74+2/3 for OUT transfers. |
- | |
212 | mov edx, 0AAAAAABh |
- | |
213 | test ch, ch |
- | |
214 | mov ecx, 74 |
- | |
215 | jz @f |
- | |
216 | mov edx, 0C962FC97h |
- | |
217 | inc ecx |
- | |
218 | @@: |
- | |
219 | imul ecx, eax |
- | |
220 | mul edx |
- | |
221 | ; Add 778 extra bits: |
- | |
222 | ; 16 bits for PRE packet, 4 bits for hub delay, 8*39 bits for Token packet |
- | |
223 | ; 8*18 bits for bus turn-around |
- | |
224 | ; (406/50)*11 bits for SYNC+EOP in Data packet, |
- | |
225 | ; 8*2 bits for inter-packet delay, |
- | |
226 | ; 16 bits for PRE packet, 4 bits for hub delay, 8*20 bits for Handshake packet, |
- | |
227 | ; 8*2 bits for another inter-packet delay. |
- | |
228 | lea eax, [ecx+edx+778] |
- | |
229 | ret |
- | |
230 | endp |
- | |
231 | 4 | ||
232 | ; USB2 scheduler. |
5 | ; USB2 scheduler. |
233 | ; There are two parts: high-speed pipes and split-transaction pipes. |
6 | ; There are two parts: high-speed pipes and split-transaction pipes. |
234 | ; |
7 | ; |
235 | ; High-speed scheduler uses the same algorithm as USB1 scheduler: |
8 | ; High-speed scheduler uses the same algorithm as USB1 scheduler: |
Line 828... | Line 601... | ||
828 | .best_fs_bandwidth dw ? |
601 | .best_fs_bandwidth dw ? |
829 | .variant dd ? |
602 | .variant dd ? |
830 | .variant_delta dd ? |
603 | .variant_delta dd ? |
831 | .target_delta dd ? |
604 | .target_delta dd ? |
832 | .local_vars_size = $ - .local_vars_start |
605 | .local_vars_size = $ - .local_vars_start |
- | 606 | if .local_vars_size > 24*4 |
|
- | 607 | err Modify stack frame size in |
|
- | 608 | end if |
|
Line 833... | Line 609... | ||
833 | 609 | ||
834 | .targetsmask dd ? |
610 | .targetsmask dd ? |
835 | .bandwidth dd ? |
611 | .bandwidth dd ? |
836 | .target dd ? |
612 | .target dd ? |
Line 990... | Line 766... | ||
990 | ; but without bit stuffing and timers drift. |
766 | ; but without bit stuffing and timers drift. |
991 | ; One extra TT-specific delay is added: TT think time from the hub descriptor. |
767 | ; One extra TT-specific delay is added: TT think time from the hub descriptor. |
992 | ; Similar to calc_usb1_bandwidth with corresponding changes. |
768 | ; Similar to calc_usb1_bandwidth with corresponding changes. |
993 | ; eax -> usb_hub with TT, ebx -> usb_pipe |
769 | ; eax -> usb_hub with TT, ebx -> usb_pipe |
994 | proc tt_calc_budget |
770 | proc tt_calc_budget |
995 | movzx ecx, [eax+usb_hub.HubCharacteristics] |
- | |
996 | shr ecx, 5 |
- | |
997 | and ecx, 3 ; 1+ecx = TT think time in FS-bytes |
771 | invoke usbhc_api.usb_get_tt_think_time ; ecx = TT think time in FS-bytes |
998 | mov eax, [ebx+ehci_pipe.Token-sizeof.ehci_pipe] |
772 | mov eax, [ebx+ehci_pipe.Token-sizeof.ehci_pipe] |
999 | shr eax, 16 |
773 | shr eax, 16 |
1000 | and eax, (1 shl 11) - 1 ; get data length |
774 | and eax, (1 shl 11) - 1 ; get data length |
1001 | bt [ebx+ehci_pipe.Token-sizeof.ehci_pipe], 12 |
775 | bt [ebx+ehci_pipe.Token-sizeof.ehci_pipe], 12 |
1002 | jc .low_speed |
776 | jc .low_speed |
1003 | ; Full-speed interrupt IN/OUT: |
777 | ; Full-speed interrupt IN/OUT: |
1004 | ; 33 bits for Token packet (8 for SYNC, 24 for token+address, 3 for EOP), |
778 | ; 33 bits for Token packet (8 for SYNC, 24 for token+address, 3 for EOP), |
1005 | ; 18 bits for bus turn-around, 11 bits for SYNC+EOP in Data packet, |
779 | ; 18 bits for bus turn-around, 11 bits for SYNC+EOP in Data packet, |
1006 | ; 2 bits for inter-packet delay, 19 bits for Handshake packet, |
780 | ; 2 bits for inter-packet delay, 19 bits for Handshake packet, |
1007 | ; 2 bits for another inter-packet delay. 85 bits total, pad to 11 bytes. |
781 | ; 2 bits for another inter-packet delay. 85 bits total, pad to 11 bytes. |
1008 | lea eax, [eax+11+ecx+1] |
782 | lea eax, [eax+11+ecx] |
1009 | ; 1 byte is minimal TT think time in addition to ecx. |
783 | ; 1 byte is minimal TT think time in addition to ecx. |
1010 | ret |
784 | ret |
1011 | .low_speed: |
785 | .low_speed: |
1012 | ; Low-speed interrupt IN/OUT: |
786 | ; Low-speed interrupt IN/OUT: |
1013 | ; multiply by 8 for LS -> FS, |
787 | ; multiply by 8 for LS -> FS, |
1014 | ; add 85 bytes as in full-speed interrupt and extra 5 bytes for two PRE packets |
788 | ; add 85 bytes as in full-speed interrupt and extra 5 bytes for two PRE packets |
1015 | ; and two hub delays. |
789 | ; and two hub delays. |
1016 | ; 1 byte is minimal TT think time in addition to ecx. |
790 | ; 1 byte is minimal TT think time in addition to ecx. |
1017 | lea eax, [eax*8+90+ecx+1] |
791 | lea eax, [eax*8+90+ecx] |
1018 | ret |
792 | ret |
1019 | endp |
793 | endp |
Line 1020... | Line 794... | ||
1020 | 794 | ||
1021 | ; Helper procedure for TT scheduler. |
795 | ; Helper procedure for TT scheduler. |