Subversion Repositories Kolibri OS

Rev

Rev 3725 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3555 Serge 1
; Implementation of periodic transaction scheduler for USB.
2
; Bandwidth dedicated to periodic transactions is limited, so
3
; different pipes should be scheduled as uniformly as possible.
4
 
5
; USB1 scheduler.
6
; Algorithm is simple:
7
; when adding a pipe, optimize the following quantity:
8
;  * for every millisecond, take all bandwidth scheduled to periodic transfers,
9
;  * calculate maximum over all milliseconds,
10
;  * select a variant which minimizes that maximum;
11
; when removing a pipe, do nothing (except for bookkeeping).
12
 
13
; sanity check: structures in UHCI and OHCI should be the same
14
if (sizeof.ohci_static_ep=sizeof.uhci_static_ep)&(ohci_static_ep.SoftwarePart=uhci_static_ep.SoftwarePart)&(ohci_static_ep.NextList=uhci_static_ep.NextList)
15
; Select a list for a new pipe.
16
; in: esi -> usb_controller, maxpacket, type, interval can be found in the stack
17
; in: ecx = 2 * maximal interval = total number of periodic lists + 1
18
; in: edx -> {u|o}hci_static_ep for the first list
19
; in: eax -> byte past {u|o}hci_static_ep for the last list in the first group
20
; out: edx -> usb_static_ep for the selected list or zero if failed
21
proc usb1_select_interrupt_list
22
; inherit some variables from usb_open_pipe
3908 Serge 23
virtual at ebp-12
24
.speed          db      ?
25
                rb      3
3555 Serge 26
.bandwidth      dd      ?
27
.target         dd      ?
28
                dd      ?
29
                dd      ?
30
.config_pipe    dd      ?
31
.endpoint       dd      ?
32
.maxpacket      dd      ?
33
.type           dd      ?
34
.interval       dd      ?
35
end virtual
36
        push    ebx edi         ; save used registers to be stdcall
37
        push    eax             ; save eax for checks in step 3
38
; 1. Only intervals 2^k ms can be supported.
39
; The core specification says that the real interval should not be greater
40
; than the interval given by the endpoint descriptor, but can be less.
41
; Determine the actual interval as 2^k ms.
42
        mov     eax, ecx
43
; 1a. Set [.interval] to 1 if it was zero; leave it as is otherwise
44
        cmp     [.interval], 1
45
        adc     [.interval], 0
46
; 1b. Divide ecx by two while it is strictly greater than [.interval].
47
@@:
48
        shr     ecx, 1
49
        cmp     [.interval], ecx
50
        jb      @b
51
; ecx = the actual interval
52
;
53
; For example, let ecx = 8, eax = 64.
54
; The scheduler space is 32 milliseconds,
55
; we need to schedule something every 8 ms;
56
; there are 8 variants: schedule at times 0,8,16,24,
57
; schedule at times 1,9,17,25,..., schedule at times 7,15,23,31.
58
; Now concentrate: there are three nested loops,
59
; * the innermost loop calculates the total periodic bandwidth scheduled
60
;   in the given millisecond,
61
; * the intermediate loop calculates the maximum over all milliseconds
62
;   in the given variant, that is the quantity we're trying to minimize,
63
; * the outermost loop checks all variants.
64
; 2. Calculate offset between the first list and the first list for the
65
; selected interval, in bytes; save in the stack for step 4.
66
        sub     eax, ecx
67
        sub     eax, ecx
68
        imul    eax, sizeof.ohci_static_ep
69
        push    eax
70
        imul    ebx, ecx, sizeof.ohci_static_ep
71
; 3. Select the best variant.
72
; 3a. The outermost loop.
73
; Prepare for the loop: set the current optimal bandwidth to maximum
74
; possible value (so that any variant will pass the first comparison),
75
; calculate delta for the intermediate loop.
76
        or      [.bandwidth], -1
77
.varloop:
78
; 3b. The intermediate loop.
79
; Prepare for the loop: set the maximum to be calculated to zero,
80
; save counter of the outermost loop.
81
        xor     edi, edi
82
        push    edx
83
virtual at esp
84
.cur_variant    dd      ?       ; step 3b
85
.result_delta   dd      ?       ; step 2
86
.group1_limit   dd      ?       ; function prolog
87
end virtual
88
.calc_max_bandwidth:
89
; 3c. The innermost loop. Sum over all lists.
90
        xor     eax, eax
91
        push    edx
92
.calc_bandwidth:
93
        add     eax, [edx+ohci_static_ep.SoftwarePart+usb_static_ep.Bandwidth]
94
        mov     edx, [edx+ohci_static_ep.NextList]
95
        test    edx, edx
96
        jnz     .calc_bandwidth
97
        pop     edx
98
; 3d. The intermediate loop continued: update maximum.
99
        cmp     eax, edi
100
        jb      @f
101
        mov     edi, eax
102
@@:
103
; 3e. The intermediate loop continued: advance counter.
104
        add     edx, ebx
105
        cmp     edx, [.group1_limit]
106
        jb      .calc_max_bandwidth
107
; 3e. The intermediate loop done: restore counter of the outermost loop.
108
        pop     edx
109
; 3f. The outermost loop continued: if the current variant is
110
; better (maybe not strictly) then the previous optimum, update
111
; the optimal bandwidth and resulting list.
112
        cmp     edi, [.bandwidth]
113
        ja      @f
114
        mov     [.bandwidth], edi
115
        mov     [.target], edx
116
@@:
117
; 3g. The outermost loop continued: advance counter.
118
        add     edx, sizeof.ohci_static_ep
119
        dec     ecx
120
        jnz     .varloop
3908 Serge 121
; 4. Calculate bandwidth for the new pipe.
122
        mov     eax, [.maxpacket]
123
        mov     cl, [.speed]
124
        mov     ch, byte [.endpoint]
125
        and     ch, 80h
126
        call    calc_usb1_bandwidth
127
; 5. Get the pointer to the best list.
3555 Serge 128
        pop     edx             ; restore value from step 2
3908 Serge 129
        pop     ecx             ; purge stack var from prolog
3555 Serge 130
        add     edx, [.target]
3908 Serge 131
; 6. Check that bandwidth for the new pipe plus old bandwidth
132
; still fits to maximum allowed by the core specification, 90% of 12000 bits.
133
        mov     ecx, eax
134
        add     ecx, [.bandwidth]
135
        cmp     ecx, 10800
136
        ja      .no_bandwidth
3555 Serge 137
; 7. Convert {o|u}hci_static_ep to usb_static_ep, update bandwidth and return.
138
        add     edx, ohci_static_ep.SoftwarePart
139
        add     [edx+usb_static_ep.Bandwidth], eax
140
        pop     edi ebx         ; restore used registers to be stdcall
141
        ret
3908 Serge 142
.no_bandwidth:
143
        dbgstr 'Periodic bandwidth limit reached'
144
        xor     edx, edx
145
        pop     edi ebx
146
        ret
3555 Serge 147
endp
148
; sanity check, part 2
149
else
150
.err select_interrupt_list must be different for UHCI and OHCI
151
end if
152
 
153
; Pipe is removing, update the corresponding lists.
154
; We do not reorder anything, so just update book-keeping variable
155
; in the list header.
156
proc usb1_interrupt_list_unlink
157
virtual at esp
158
                dd      ?       ; return address
159
.maxpacket      dd      ?
160
.lowspeed       db      ?
161
.direction      db      ?
162
                rb      2
163
end virtual
3908 Serge 164
; calculate bandwidth on the bus
165
        mov     eax, [.maxpacket]
166
        mov     ecx, dword [.lowspeed]
167
        call    calc_usb1_bandwidth
3555 Serge 168
; find list header
169
        mov     edx, ebx
170
@@:
171
        mov     edx, [edx+usb_pipe.NextVirt]
172
        cmp     [edx+usb_pipe.Controller], esi
3908 Serge 173
        jz      @b
3555 Serge 174
; subtract pipe bandwidth
175
        sub     [edx+usb_static_ep.Bandwidth], eax
176
        ret     8
177
endp
178
 
3908 Serge 179
; Helper procedure for USB1 scheduler: calculate bandwidth on the bus.
180
; in: low 11 bits of eax = payload size in bytes
181
; in: cl = 0 - full-speed, nonzero - high-speed
182
; in: ch = 0 - OUT, nonzero - IN
183
; out: eax = maximal bandwidth in FS-bits
184
proc calc_usb1_bandwidth
185
        and     eax, (1 shl 11) - 1     ; get payload for one transaction
186
        add     eax, 3  ; add 3 bytes for other fields in data packet, PID+CRC16
187
        test    cl, cl
188
        jnz     .low_speed
189
; Multiply by 8 for bytes -> bits, by 7/6 to accomodate bit stuffing
190
; and by 401/400 for IN transfers to accomodate timers difference
191
; 9+107/300 for IN transfers, 9+1/3 for OUT transfers
192
; For 0 <= eax < 09249355h, floor(eax * 107/300) = floor(eax * 5B4E81B5h / 2^32).
193
; For 0 <= eax < 80000000h, floor(eax / 3) = floor(eax * 55555556h / 2^32).
194
        mov     edx, 55555556h
195
        test    ch, ch
196
        jz      @f
197
        mov     edx, 5B4E81B5h
198
@@:
199
        lea     ecx, [eax*9]
200
        mul     edx
201
; Add 93 extra bits: 39 bits for Token packet (8 for SYNC, 24 for token+address,
202
; 4 extra bits for possible bit stuffing in token+address, 3 for EOP),
203
; 18 bits for bus turn-around, 11 bits for SYNC+EOP in Data packet plus 1 bit
204
; for possible timers difference, 2 bits for inter-packet delay, 20 bits for
205
; Handshake packet, 2 bits for another inter-packet delay.
206
        lea     eax, [ecx+edx+93]
207
        ret
208
.low_speed:
209
; Multiply by 8 for bytes -> bits, by 7/6 to accomodate bit stuffing,
210
; by 8 for LS -> FS and by 406/50 for IN transfers to accomodate timers difference.
211
; 75+59/75 for IN transfers, 74+2/3 for OUT transfers.
212
        mov     edx, 0AAAAAABh
213
        test    ch, ch
214
        mov     ecx, 74
215
        jz      @f
216
        mov     edx, 0C962FC97h
217
        inc     ecx
218
@@:
219
        imul    ecx, eax
220
        mul     edx
221
; Add 778 extra bits:
222
; 16 bits for PRE packet, 4 bits for hub delay, 8*39 bits for Token packet
223
; 8*18 bits for bus turn-around
224
; (406/50)*11 bits for SYNC+EOP in Data packet,
225
; 8*2 bits for inter-packet delay,
226
; 16 bits for PRE packet, 4 bits for hub delay, 8*20 bits for Handshake packet,
227
; 8*2 bits for another inter-packet delay.
228
        lea     eax, [ecx+edx+778]
229
        ret
230
endp
231
 
3555 Serge 232
; USB2 scheduler.
233
; There are two parts: high-speed pipes and split-transaction pipes.
3908 Serge 234
;
3555 Serge 235
; High-speed scheduler uses the same algorithm as USB1 scheduler:
236
; when adding a pipe, optimize the following quantity:
237
;  * for every microframe, take all bandwidth scheduled to periodic transfers,
3908 Serge 238
;  * calculate maximum over all microframes,
3555 Serge 239
;  * select a variant which minimizes that maximum;
3908 Serge 240
;  * if there are several such variants,
241
;    prefer those that are closer to end of frame
242
;    to minimize collisions with split transactions;
3555 Serge 243
; when removing a pipe, do nothing (except for bookkeeping).
244
; in: esi -> usb_controller
245
; out: edx -> usb_static_ep, eax = S-Mask
246
proc ehci_select_hs_interrupt_list
247
; inherit some variables from usb_open_pipe
248
virtual at ebp-12
249
.targetsmask    dd      ?
250
.bandwidth      dd      ?
251
.target         dd      ?
252
                dd      ?
253
                dd      ?
254
.config_pipe    dd      ?
255
.endpoint       dd      ?
256
.maxpacket      dd      ?
257
.type           dd      ?
258
.interval       dd      ?
259
end virtual
260
; prolog, initialize local vars
261
        or      [.bandwidth], -1
262
        or      [.target], -1
263
        or      [.targetsmask], -1
264
        push    ebx edi         ; save used registers to be stdcall
265
; 1. In EHCI, every list describes one millisecond = 8 microframes.
266
; Thus, there are two significantly different branches:
267
; for pipes with interval >= 8 microframes, advance to 2,
268
; for pipes which should be planned in every frame (one or more microframes),
269
; go to 9.
270
; Note: the actual interval for high-speed devices is 2^([.interval]-1),
271
; (the core specification forbids [.interval] == 0)
272
        mov     ecx, [.interval]
273
        dec     ecx
274
        cmp     ecx, 3
275
        jb      .every_frame
276
; 2. Determine the actual interval in milliseconds.
277
        sub     ecx, 3
278
        cmp     ecx, 5  ; maximum 32ms
279
        jbe     @f
3626 Serge 280
        movi    ecx, 5
3555 Serge 281
@@:
282
; There are four nested loops,
283
; * Loop #4 (the innermost one) calculates the total periodic bandwidth
284
;   scheduled in the given microframe of the given millisecond.
285
; * Loop #3 calculates the maximum over all milliseconds
286
;   in the given variant, that is the quantity we're trying to minimize.
287
; * Loops #1 and #2 check all variants;
288
;   loop #1 is responsible for the target millisecond,
289
;   loop #2 is responsible for the microframe within millisecond.
290
; 3. Prepare for loops.
291
; ebx = number of iterations of loop #1
292
; [esp] = delta of counter for loop #3, in bytes
293
; [esp+4] = delta between the first group and the target group, in bytes
3626 Serge 294
        movi    ebx, 1
295
        movi    edx, sizeof.ehci_static_ep
3555 Serge 296
        shl     ebx, cl
297
        shl     edx, cl
298
        mov     eax, 64*sizeof.ehci_static_ep
299
        sub     eax, edx
300
        sub     eax, edx
301
        push    eax
302
        push    edx
303
; 4. Select the best variant.
304
; 4a. Loop #1: initialize counter = pointer to ehci_static_ep for
305
; the target millisecond in the first group.
306
        lea     edx, [esi+ehci_controller.IntEDs-sizeof.ehci_controller]
307
.varloop0:
308
; 4b. Loop #2: initialize counter = microframe within the target millisecond.
309
        xor     ecx, ecx
310
.varloop:
311
; 4c. Loop #3: save counter of loop #1,
312
; initialize counter with the value of loop #1 counter,
313
; initialize maximal bandwidth = zero.
314
        xor     edi, edi
315
        push    edx
316
virtual at esp
317
.saved_counter1         dd      ?       ; step 4c
318
.loop3_delta            dd      ?       ; step 3
319
.target_delta           dd      ?       ; step 3
320
end virtual
321
.calc_max_bandwidth:
322
; 4d. Loop #4: initialize counter with the value of loop #3 counter,
323
; initialize total bandwidth = zero.
324
        xor     eax, eax
325
        push    edx
326
.calc_bandwidth:
327
; 4e. Loop #4: add the bandwidth from the current list
328
; and advance to the next list, while there is one.
329
        add     ax, [edx+ehci_static_ep.Bandwidths+ecx*2]
330
        mov     edx, [edx+ehci_static_ep.NextList]
331
        test    edx, edx
332
        jnz     .calc_bandwidth
333
; 4f. Loop #4 end: restore counter of loop #3.
334
        pop     edx
335
; 4g. Loop #3: update maximal bandwidth.
336
        cmp     eax, edi
337
        jb      @f
338
        mov     edi, eax
339
@@:
340
; 4h. Loop #3: advance the counter and repeat while within the first group.
341
        lea     eax, [esi+ehci_controller.IntEDs+32*sizeof.ehci_static_ep-sizeof.ehci_controller]
342
        add     edx, [.loop3_delta]
343
        cmp     edx, eax
344
        jb      .calc_max_bandwidth
345
; 4i. Loop #3 end: restore counter of loop #1.
346
        pop     edx
347
; 4j. Loop #2: if the current variant is better (maybe not strictly)
348
; then the previous optimum, update the optimal bandwidth and the target.
349
        cmp     edi, [.bandwidth]
350
        ja      @f
3908 Serge 351
        jb      .update
352
        cmp     ecx, [.targetsmask]
353
        jb      @f
354
.update:
3555 Serge 355
        mov     [.bandwidth], edi
356
        mov     [.target], edx
3908 Serge 357
        mov     [.targetsmask], ecx
3555 Serge 358
@@:
359
; 4k. Loop #2: continue 8 times for every microframe.
360
        inc     ecx
361
        cmp     ecx, 8
362
        jb      .varloop
363
; 4l. Loop #1: advance counter and repeat ebx times,
364
; ebx was calculated in step 3.
365
        add     edx, sizeof.ehci_static_ep
366
        dec     ebx
367
        jnz     .varloop0
3908 Serge 368
; 5. Calculate bandwidth for the new pipe.
3555 Serge 369
        mov     eax, [.maxpacket]
3908 Serge 370
        call    calc_hs_bandwidth
371
        mov     ecx, [.maxpacket]
3555 Serge 372
        shr     ecx, 11
373
        inc     ecx
374
        and     ecx, 3
375
        imul    eax, ecx
3908 Serge 376
; 6. Get the pointer to the best list.
377
        pop     edx             ; restore value from step 3
378
        pop     edx             ; get delta calculated in step 3
379
        add     edx, [.target]
380
; 7. Check that bandwidth for the new pipe plus old bandwidth
3555 Serge 381
; still fits to maximum allowed by the core specification
382
; current [.bandwidth] + new bandwidth <= limit;
383
; USB2 specification allows maximum 60000*80% bit times for periodic microframe
3908 Serge 384
        mov     ecx, [.bandwidth]
385
        add     ecx, eax
386
        cmp     ecx, 48000
387
        ja      .no_bandwidth
3555 Serge 388
; 8. Convert {o|u}hci_static_ep to usb_static_ep, update bandwidth and return.
389
        mov     ecx, [.targetsmask]
390
        add     [edx+ehci_static_ep.Bandwidths+ecx*2], ax
391
        add     edx, ehci_static_ep.SoftwarePart
3626 Serge 392
        movi    eax, 1
3555 Serge 393
        shl     eax, cl
394
        pop     edi ebx         ; restore used registers to be stdcall
395
        ret
3908 Serge 396
.no_bandwidth:
397
        dbgstr 'Periodic bandwidth limit reached'
398
        xor     eax, eax
399
        xor     edx, edx
400
        pop     edi ebx
401
        ret
3555 Serge 402
.every_frame:
403
; The pipe should be scheduled every frame in two or more microframes.
404
; 9. Calculate maximal bandwidth for every microframe: three nested loops.
405
; 9a. The outermost loop: ebx = microframe to calculate.
406
        xor     ebx, ebx
407
.calc_all_bandwidths:
408
; 9b. The intermediate loop:
409
; edx = pointer to ehci_static_ep in the first group, [esp] = counter,
410
; edi = maximal bandwidth
411
        lea     edx, [esi+ehci_controller.IntEDs-sizeof.ehci_controller]
412
        xor     edi, edi
413
        push    32
414
.calc_max_bandwidth2:
415
; 9c. The innermost loop: calculate bandwidth for the given microframe
416
; in the given frame.
417
        xor     eax, eax
418
        push    edx
419
.calc_bandwidth2:
420
        add     ax, [edx+ehci_static_ep.Bandwidths+ebx*2]
421
        mov     edx, [edx+ehci_static_ep.NextList]
422
        test    edx, edx
423
        jnz     .calc_bandwidth2
424
        pop     edx
425
; 9d. The intermediate loop continued: update maximal bandwidth.
426
        cmp     eax, edi
427
        jb      @f
428
        mov     edi, eax
429
@@:
430
        add     edx, sizeof.ehci_static_ep
431
        dec     dword [esp]
432
        jnz     .calc_max_bandwidth2
433
        pop     eax
434
; 9e. Push the calculated maximal bandwidth and continue the outermost loop.
435
        push    edi
436
        inc     ebx
437
        cmp     ebx, 8
438
        jb      .calc_all_bandwidths
439
virtual at esp
440
.bandwidth7     dd      ?
441
.bandwidth6     dd      ?
442
.bandwidth5     dd      ?
443
.bandwidth4     dd      ?
444
.bandwidth3     dd      ?
445
.bandwidth2     dd      ?
446
.bandwidth1     dd      ?
447
.bandwidth0     dd      ?
448
end virtual
449
; 10. Select the best variant.
450
; edx = S-Mask = bitmask of scheduled microframes
3626 Serge 451
        movi    edx, 0x11
3555 Serge 452
        cmp     ecx, 1
453
        ja      @f
454
        mov     dl, 0x55
455
        jz      @f
456
        mov     dl, 0xFF
457
@@:
458
; try all variants edx, edx shl 1, edx shl 2, ...
3908 Serge 459
; while they fit in the lower byte (8 microframes per frame)
3555 Serge 460
.select_best_mframe:
461
        xor     edi, edi
462
        mov     ecx, edx
463
        mov     eax, esp
464
.calc_mframe:
465
        add     cl, cl
466
        jnc     @f
467
        cmp     edi, [eax]
468
        jae     @f
469
        mov     edi, [eax]
470
@@:
471
        add     eax, 4
472
        test    cl, cl
473
        jnz     .calc_mframe
474
        cmp     [.bandwidth], edi
475
        jb      @f
476
        mov     [.bandwidth], edi
477
        mov     [.targetsmask], edx
478
@@:
479
        add     dl, dl
480
        jnc     .select_best_mframe
481
; 11. Restore stack after step 9.
482
        add     esp, 8*4
483
; 12. Get the pointer to the target list (responsible for every microframe).
484
        lea     edx, [esi+ehci_controller.IntEDs.SoftwarePart+62*sizeof.ehci_static_ep-sizeof.ehci_controller]
3908 Serge 485
; 13. Calculate bandwidth on the bus.
3555 Serge 486
        mov     eax, [.maxpacket]
3908 Serge 487
        call    calc_hs_bandwidth
488
        mov     ecx, [.maxpacket]
3555 Serge 489
        shr     ecx, 11
490
        inc     ecx
491
        and     ecx, 3
492
        imul    eax, ecx
3908 Serge 493
; 14. Check that current [.bandwidth] + new bandwidth <= limit;
3555 Serge 494
; USB2 specification allows maximum 60000*80% bit times for periodic microframe.
3908 Serge 495
        mov     ecx, [.bandwidth]
496
        add     ecx, eax
497
        cmp     ecx, 48000
498
        ja      .no_bandwidth
499
; 15. Update bandwidths including the new pipe.
3555 Serge 500
        mov     ecx, [.targetsmask]
501
        lea     edi, [edx+ehci_static_ep.Bandwidths-ehci_static_ep.SoftwarePart]
502
.update_bandwidths:
503
        shr     ecx, 1
504
        jnc     @f
505
        add     [edi], ax
506
@@:
507
        add     edi, 2
508
        test    ecx, ecx
509
        jnz     .update_bandwidths
3908 Serge 510
; 16. Return target list and target S-Mask.
3555 Serge 511
        mov     eax, [.targetsmask]
512
        pop     edi ebx         ; restore used registers to be stdcall
513
        ret
514
endp
515
 
516
; Pipe is removing, update the corresponding lists.
517
; We do not reorder anything, so just update book-keeping variable
518
; in the list header.
519
proc ehci_hs_interrupt_list_unlink
3725 Serge 520
        movzx   eax, word [ebx+ehci_pipe.Token-sizeof.ehci_pipe+2]
3908 Serge 521
; calculate bandwidth
522
        call    calc_hs_bandwidth
3725 Serge 523
        mov     ecx, [ebx+ehci_pipe.Flags-sizeof.ehci_pipe]
3555 Serge 524
        shr     ecx, 30
525
        imul    eax, ecx
3725 Serge 526
        movzx   ecx, byte [ebx+ehci_pipe.Flags-sizeof.ehci_pipe]
3908 Serge 527
; get target list
528
        mov     edx, [ebx+ehci_pipe.BaseList-sizeof.ehci_pipe]
3555 Serge 529
; update bandwidth
530
.dec_bandwidth:
531
        shr     ecx, 1
532
        jnc     @f
3908 Serge 533
        sub     word [edx+ehci_static_ep.Bandwidths - ehci_static_ep.SoftwarePart], ax
3555 Serge 534
@@:
535
        add     edx, 2
536
        test    ecx, ecx
537
        jnz     .dec_bandwidth
538
; return
539
        ret
540
endp
541
 
3908 Serge 542
; Helper procedure for USB2 scheduler: calculate bandwidth on the bus.
543
; in: low 11 bits of eax = payload size in bytes
544
; out: eax = maximal bandwidth in HS-bits
545
proc calc_hs_bandwidth
546
        and     eax, (1 shl 11) - 1     ; get payload for one transaction
547
        add     eax, 3  ; add 3 bytes for other fields in data packet, PID+CRC16
548
; Multiply by 8 for bytes -> bits and then by 7/6 to accomodate bit stuffing;
549
; total 28/3 = 9+1/3
550
        mov     edx, 55555556h
551
        lea     ecx, [eax*9]
552
        mul     edx
553
; Add 989 extra bits: 68 bits for Token packet (32 for SYNC, 24 for token+address,
554
; 4 extra bits for possible bit stuffing in token+address, 8 for EOP),
555
; 736 bits for bus turn-around, 40 bits for SYNC+EOP in Data packet,
556
; 8 bits for inter-packet delay, 49 bits for Handshake packet,
557
; 88 bits for another inter-packet delay.
558
        lea     eax, [ecx+edx+989]
559
        ret
560
endp
3555 Serge 561
 
3908 Serge 562
; Split-transaction scheduler (aka TT scheduler, TT stands for Transaction
563
; Translator, section 11.14 of the core spec) needs to schedule three event
564
; types on two buses: Start-Split and Complete-Split on HS bus and normal
565
; transaction on FS/LS bus.
566
; Assume that FS/LS bus is more restricted and more important to be scheduled
567
; uniformly, so select the variant which minimizes maximal used bandwidth
568
; on FS/LS bus and does not overflow HS bus.
569
; If there are several such variants, prefer variants which is closest to
570
; start of frame, and within the same microframe consider HS bandwidth
571
; utilization as a last criteria.
572
 
573
; The procedure ehci_select_tt_interrupt_list has been splitted into several
574
; macro, each representing a logical step of the procedure,
575
; to simplify understanding what is going on. Consider all the following macro
576
; as logical parts of one procedure, they are meaningless outside the context.
577
 
578
; Given a frame, calculate bandwidth occupied by already opened pipes
579
; in every microframe.
580
; Look for both HS and FS/LS buses: there are 16 words of information,
581
; 8 for HS bus, 8 for FS/LS bus, for every microframe.
582
; Since we count already opened pipes, the total bandwidth in every microframe
583
; is less than 60000 bits (and even 60000*80% bits), otherwise the scheduler
584
; would not allow to open those pipes.
585
; edi -> first list for the frame
586
macro tt_calc_bandwidth_in_frame
587
{
588
local .lists, .pipes, .pipes_done, .carry
589
; 1. Zero everything.
590
        xor     eax, eax
591
        mov     edx, edi
592
repeat 4
593
        mov     dword [.budget+(%-1)*4], eax
594
end repeat
595
repeat 4
596
        mov     dword [.hs_bandwidth+(%-1)*4], eax
597
end repeat
598
        mov     [.total_budget], ax
599
; Loop over all lists for the given frame.
600
.lists:
601
; 2. Total HS bandwidth for all pipes in one list is kept inside list header,
602
; add it. Note that overflow is impossible, so we may add entire dwords.
603
        mov     ebx, [edx+ehci_static_ep.SoftwarePart+usb_static_ep.NextVirt]
604
repeat 4
605
        mov     eax, dword [edx+ehci_static_ep.Bandwidths+(%-1)*4]
606
        add     dword [.hs_bandwidth+(%-1)*4], eax
607
end repeat
608
; Loop over all pipes in the given list.
609
        add     edx, ehci_static_ep.SoftwarePart
610
.pipes:
611
        cmp     ebx, edx
612
        jz      .pipes_done
613
; 3. For every pipe in every list for the given frame:
614
; 3a. Check whether the pipe resides on the same FS/LS bus as the new pipe.
615
; If not, skip this pipe.
616
        mov     eax, [ebx+usb_pipe.DeviceData]
617
        mov     eax, [eax+usb_device_data.TTHub]
618
        cmp     eax, [.tthub]
619
        jnz     @f
620
; 3b. Calculate FS/LS budget for the opened pipe.
621
; Note that eax = TTHub after 3a.
622
        call    tt_calc_budget
623
; 3c. Update total budget: add the value from 3b
624
; to the budget of the first microframe scheduled for this pipe.
625
        bsf     ecx, [ebx+ehci_pipe.Flags-sizeof.ehci_pipe]
626
        add     [.budget+ecx*2], ax
627
@@:
628
        mov     ebx, [ebx+usb_pipe.NextVirt]
629
        jmp     .pipes
630
.pipes_done:
631
        mov     edx, [edx+ehci_static_ep.NextList-ehci_static_ep.SoftwarePart]
632
        test    edx, edx
633
        jnz     .lists
634
; 4. If the budget for some microframe is exceeded, carry it to the following
635
; microframe(s). The actual size of one microframe is 187.5 raw bytes;
636
; the core spec says that 188 bytes should be scheduled in every microframe.
637
        xor     eax, eax
638
        xor     ecx, ecx
639
.carry:
640
        xor     edx, edx
641
        add     ax, [.budget+ecx*2]
642
        cmp     ax, 188
643
        jbe     @f
644
        mov     dx, ax
645
        mov     ax, 188
646
        sub     dx, ax
647
@@:
648
        mov     [.budget+ecx*2], ax
649
        add     [.total_budget], ax
650
        mov     ax, dx
651
        inc     ecx
652
        cmp     ecx, 8
653
        jb      .carry
654
}
655
 
656
; Checks whether the new pipe fits in the existing FS budget
657
; starting from the given microframe. If not, mark the microframe
658
; as impossible for scheduling.
659
; in: ecx = microframe
660
macro tt_exclude_microframe_if_no_budget
661
{
662
local .loop, .good, .bad
663
; 1. If the new budget plus the current budget does not exceed 188 bytes,
664
; the variant is possible.
665
        mov     ax, [.budget+ecx*2]
666
        mov     edx, ecx
667
        add     ax, [.new_budget]
668
        sub     ax, 188
669
        jbe     .good
670
; 2. Otherwise,
671
; a) nothing should be scheduled in some following microframes,
672
; b) after adding the new budget everything should fit in first 6 microframes,
673
;    this guarantees that even in the worst case 90% limit is satisfied.
674
.loop:
675
        cmp     edx, 5
676
        jae     .bad
677
        cmp     [.budget+(edx+1)*2], 0
678
        jnz     .bad
679
        inc     edx
680
        sub     ax, 188
681
        ja      .loop
682
.bad:
683
        btr     [.possible_microframes], ecx
684
.good:
685
}
686
 
687
; Calculate data corresponding to the particular scheduling variant for the new pipe.
688
; Data describe the current scheduling state collected over all frames touched
689
; by the given variant: maximal HS bandwidth, maximal FS/LS budget,
690
; which microframes fit in the current FS/LS budget for all frames.
691
macro tt_calc_statistics_for_one_variant
692
{
693
local .frames, .microframes
694
; 1. Initialize: zero maximal bandwidth,
695
; first 6 microframes are possible for scheduling.
696
        xor     eax, eax
697
repeat 4
698
        mov     dword [.max_hs_bandwidth+(%-1)*4], eax
699
end repeat
700
        mov     [.max_fs_bandwidth], ax
701
        mov     [.possible_microframes], 0x3F
702
; Loop over all frames starting with [.variant] advancing by [.variant_delta].
703
        mov     edi, [.variant]
704
.frames:
705
; 2. Calculate statistics for one frame.
706
        tt_calc_bandwidth_in_frame
707
; 3. Update maximal FS budget.
708
        mov     ax, [.total_budget]
709
        cmp     ax, [.max_fs_bandwidth]
710
        jb      @f
711
        mov     [.max_fs_bandwidth], ax
712
@@:
713
; 4. For every microframe, update maximal HS bandwidth
714
; and check whether the microframe is allowed for scheduling.
715
        xor     ecx, ecx
716
.microframes:
717
        mov     ax, [.hs_bandwidth+ecx*2]
718
        cmp     ax, [.max_hs_bandwidth+ecx*2]
719
        jb      @f
720
        mov     [.max_hs_bandwidth+ecx*2], ax
721
@@:
722
        tt_exclude_microframe_if_no_budget
723
        inc     ecx
724
        cmp     ecx, 8
725
        jb      .microframes
726
; Stop loop when outside of first descriptor group.
727
        lea     eax, [esi+ehci_controller.IntEDs+32*sizeof.ehci_static_ep-sizeof.ehci_controller]
728
        add     edi, [.variant_delta]
729
        cmp     edi, eax
730
        jb      .frames
731
}
732
 
733
struct usb_split_info
734
microframe_mask         dd      ?       ; lower byte is S-mask, second byte is C-mask
735
ssplit_bandwidth        dd      ?
736
csplit_bandwidth        dd      ?
737
ends
738
 
739
; Check whether the current variant and the current microframe are allowed
740
; for scheduling. If so, check whether they are better than the previously
741
; selected variant+microframe, if any. If so, update the previously selected
742
; variant+microframe to current ones.
743
; ecx = microframe, [.variant] = variant
744
macro tt_check_variant_microframe
745
{
746
local .nothing, .update, .ssplit, .csplit, .csplit_done
747
; 1. If the current microframe does not fit in existing FS budget, do nothing.
748
        bt      [.possible_microframes], ecx
749
        jnc     .nothing
750
; 2. Calculate maximal HS bandwidth over all affected microframes.
751
; 2a. Start-split phase: one or more microframes starting with ecx,
752
; coded in lower byte of .info.microframe_mask.
753
        xor     ebx, ebx
754
        xor     edx, edx
755
.ssplit:
756
        lea     eax, [ecx+edx]
757
        movzx   eax, [.max_hs_bandwidth+eax*2]
758
        add     eax, [.info.ssplit_bandwidth]
759
        cmp     ebx, eax
760
        ja      @f
761
        mov     ebx, eax
762
@@:
763
        inc     edx
764
        bt      [.info.microframe_mask], edx
765
        jc      .ssplit
766
; 2b. Complete-split phase: zero or more microframes starting with
767
; ecx+(last start-split microframe)+2,
768
; coded in second byte of .info.microframe_mask.
769
        add     edx, 8
770
.csplit:
771
        inc     edx
772
        bt      [.info.microframe_mask], edx
773
        jnc     .csplit_done
774
        lea     eax, [ecx+edx]
775
        cmp     eax, 8
776
        jae     .csplit_done
777
        movzx   eax, [.max_hs_bandwidth+(eax-8)*2]
778
        add     eax, [.info.csplit_bandwidth]
779
        cmp     ebx, eax
780
        ja      .csplit
781
        mov     ebx, eax
782
        jmp     .csplit
783
.csplit_done:
784
; 3. Check that current HS bandwidth + new bandwidth <= limit;
785
; USB2 specification allows maximum 60000*80% bit times for periodic microframe.
786
        cmp     ebx, 48000
787
        ja      .nothing
788
; 4. This variant is possible for scheduling.
789
; Check whether it is better than the currently selected one.
790
; 4a. The primary criteria: FS/LS bandwidth.
791
        mov     ax, [.max_fs_bandwidth]
792
        cmp     ax, [.best_fs_bandwidth]
793
        ja      .nothing
794
        jb      .update
795
; 4b. The secondary criteria: prefer microframes which are closer to start of frame.
796
        cmp     ecx, [.targetsmask]
797
        ja      .nothing
798
        jb      .update
799
; 4c. The last criteria: HS bandwidth.
800
        cmp     ebx, [.bandwidth]
801
        ja      .nothing
802
.update:
803
; 5. This variant is better than the previously selected.
804
; Update the best variant with current data.
805
        mov     [.best_fs_bandwidth], ax
806
        mov     [.bandwidth], ebx
807
        mov     [.targetsmask], ecx
808
        mov     eax, [.variant]
809
        mov     [.target], eax
810
.nothing:
811
}
812
 
813
; TT scheduler: add new pipe.
814
; in: esi -> usb_controller, edi -> usb_pipe
815
; out: edx -> usb_static_ep, eax = S-Mask
816
proc ehci_select_tt_interrupt_list
817
virtual at ebp-12-.local_vars_size
818
.local_vars_start:
819
.info                   usb_split_info
820
.new_budget             dw      ?
821
.total_budget           dw      ?
822
.possible_microframes   dd      ?
823
.tthub                  dd      ?
824
.budget                 rw      8
825
.hs_bandwidth           rw      8
826
.max_hs_bandwidth       rw      8
827
.max_fs_bandwidth       dw      ?
828
.best_fs_bandwidth      dw      ?
829
.variant                dd      ?
830
.variant_delta          dd      ?
831
.target_delta           dd      ?
832
.local_vars_size = $ - .local_vars_start
833
 
3555 Serge 834
.targetsmask    dd      ?
835
.bandwidth      dd      ?
836
.target         dd      ?
837
                dd      ?
838
                dd      ?
839
.config_pipe    dd      ?
840
.endpoint       dd      ?
841
.maxpacket      dd      ?
842
.type           dd      ?
843
.interval       dd      ?
844
end virtual
3908 Serge 845
        mov     eax, [edi+ehci_pipe.Token-sizeof.ehci_pipe]
846
        shr     eax, 16
847
        and     eax, (1 shl 11) - 1
848
        push    ebx edi
849
; 1. Compute the real interval. FS/LS devices encode the interval as
850
; number of milliseconds. Use the maximal power of two that is not greater than
851
; the given interval and EHCI scheduling area = 32 frames.
3555 Serge 852
        cmp     [.interval], 1
853
        adc     [.interval], 0
854
        mov     ecx, 64
3908 Serge 855
        mov     eax, 64 * sizeof.ehci_static_ep
3555 Serge 856
@@:
857
        shr     ecx, 1
858
        cmp     [.interval], ecx
859
        jb      @b
3908 Serge 860
        mov     [.interval], ecx
861
; 2. Compute variables for further calculations.
862
; 2a. [.variant_delta] is delta between two lists from the first group
863
; that correspond to the same variant.
864
        imul    ecx, sizeof.ehci_static_ep
865
        mov     [.variant_delta], ecx
866
; 2b. [.target_delta] is delta between the final answer from the group
867
; corresponding to [.interval] and the item from the first group.
3555 Serge 868
        sub     eax, ecx
869
        sub     eax, ecx
3908 Serge 870
        mov     [.target_delta], eax
871
; 2c. [.variant] is the first list from the first group that corresponds
872
; to the current variant.
873
        lea     eax, [esi+ehci_controller.IntEDs-sizeof.ehci_controller]
874
        mov     [.variant], eax
875
; 2d. [.tthub] identifies TT hub for new pipe, [.new_budget] is FS budget
876
; for new pipe.
877
        mov     eax, [edi+usb_pipe.DeviceData]
878
        mov     eax, [eax+usb_device_data.TTHub]
879
        mov     ebx, edi
880
        mov     [.tthub], eax
881
        call    tt_calc_budget
882
        mov     [.new_budget], ax
883
; 2e. [.usb_split_info] describes bandwidth used by new pipe on HS bus.
884
        lea     edi, [.info]
885
        call    tt_fill_split_info
886
        test    eax, eax
887
        jz      .no_bandwidth
888
; 2f. There is no best variant yet, put maximal possible values,
889
; so any variant would be better than the "current".
890
        or      [.best_fs_bandwidth], -1
891
        or      [.target], -1
892
        or      [.bandwidth], -1
893
        or      [.targetsmask], -1
894
; 3. Loop over all variants, for every variant decide whether it is acceptable,
895
; select the best variant from all acceptable variants.
896
.check_variants:
897
        tt_calc_statistics_for_one_variant
898
        xor     ecx, ecx
899
.check_microframes:
900
        tt_check_variant_microframe
901
        inc     ecx
902
        cmp     ecx, 6
903
        jb      .check_microframes
904
        add     [.variant], sizeof.ehci_static_ep
905
        dec     [.interval]
906
        jnz     .check_variants
907
; 4. If there is no acceptable variants, return error.
908
        mov     ecx, [.targetsmask]
909
        mov     edx, [.target]
910
        cmp     ecx, -1
911
        jz      .no_bandwidth
912
; 5. Calculate the answer: edx -> selected list, eax = S-Mask and C-Mask.
913
        mov     eax, [.info.microframe_mask]
914
        add     edx, [.target_delta]
915
        shl     eax, cl
916
        and     eax, 0xFFFF
917
; 6. Update HS bandwidths in the selected list.
918
        xor     ecx, ecx
919
        mov     ebx, [.info.ssplit_bandwidth]
920
.update_ssplit:
921
        bt      eax, ecx
922
        jnc     @f
923
        add     [edx+ehci_static_ep.Bandwidths+ecx*2], bx
924
@@:
925
        inc     ecx
926
        cmp     ecx, 8
927
        jb      .update_ssplit
928
        mov     ebx, [.info.csplit_bandwidth]
929
.update_csplit:
930
        bt      eax, ecx
931
        jnc     @f
932
        add     [edx+ehci_static_ep.Bandwidths+(ecx-8)*2], bx
933
@@:
934
        inc     ecx
935
        cmp     ecx, 16
936
        jb      .update_csplit
937
; 7. Return.
938
        add     edx, ehci_static_ep.SoftwarePart
939
        pop     edi ebx
3555 Serge 940
        ret
3908 Serge 941
.no_bandwidth:
942
        dbgstr 'Periodic bandwidth limit reached'
943
        xor     eax, eax
944
        xor     edx, edx
945
        pop     edi ebx
946
        ret
3555 Serge 947
endp
948
 
3908 Serge 949
; Pipe is removing, update the corresponding lists.
950
; We do not reorder anything, so just update book-keeping variable
951
; in the list header.
3555 Serge 952
proc ehci_fs_interrupt_list_unlink
3908 Serge 953
; calculate bandwidth
954
        push    edi
955
        sub     esp, sizeof.usb_split_info
956
        mov     edi, esp
957
        call    tt_fill_split_info
958
; get target list
959
        mov     edx, [ebx+ehci_pipe.BaseList-sizeof.ehci_pipe]
960
; update bandwidth for Start-Split
961
        mov     eax, [edi+usb_split_info.ssplit_bandwidth]
962
        xor     ecx, ecx
963
.dec_bandwidth_1:
964
        bt      [ebx+ehci_pipe.Flags-sizeof.ehci_pipe], ecx
965
        jnc     @f
966
        sub     word [edx+ecx*2+ehci_static_ep.Bandwidths - ehci_static_ep.SoftwarePart], ax
967
@@:
968
        inc     ecx
969
        cmp     ecx, 8
970
        jb      .dec_bandwidth_1
971
; update bandwidth for Complete-Split
972
        mov     eax, [edi+usb_split_info.csplit_bandwidth]
973
.dec_bandwidth_2:
974
        bt      [ebx+ehci_pipe.Flags-sizeof.ehci_pipe], ecx
975
        jnc     @f
976
        sub     word [edx+(ecx-8)*2+ehci_static_ep.Bandwidths - ehci_static_ep.SoftwarePart], ax
977
@@:
978
        inc     ecx
979
        cmp     ecx, 16
980
        jb      .dec_bandwidth_2
981
        add     esp, sizeof.usb_split_info
982
        pop     edi
3555 Serge 983
        ret
984
endp
3908 Serge 985
 
986
; Helper procedure for ehci_select_tt_interrupt_list.
987
; Calculates "best-case budget" according to the core spec,
988
; that is, number of bytes (not bits) corresponding to "optimistic" transaction
989
; time, including inter-packet delays/bus turn-around time,
990
; but without bit stuffing and timers drift.
991
; One extra TT-specific delay is added: TT think time from the hub descriptor.
992
; Similar to calc_usb1_bandwidth with corresponding changes.
993
; eax -> usb_hub with TT, ebx -> usb_pipe
994
proc tt_calc_budget
995
        movzx   ecx, [eax+usb_hub.HubCharacteristics]
996
        shr     ecx, 5
997
        and     ecx, 3  ; 1+ecx = TT think time in FS-bytes
998
        mov     eax, [ebx+ehci_pipe.Token-sizeof.ehci_pipe]
999
        shr     eax, 16
1000
        and     eax, (1 shl 11) - 1     ; get data length
1001
        bt      [ebx+ehci_pipe.Token-sizeof.ehci_pipe], 12
1002
        jc      .low_speed
1003
; Full-speed interrupt IN/OUT:
1004
; 33 bits for Token packet (8 for SYNC, 24 for token+address, 3 for EOP),
1005
; 18 bits for bus turn-around, 11 bits for SYNC+EOP in Data packet,
1006
; 2 bits for inter-packet delay, 19 bits for Handshake packet,
1007
; 2 bits for another inter-packet delay. 85 bits total, pad to 11 bytes.
1008
        lea     eax, [eax+11+ecx+1]
1009
; 1 byte is minimal TT think time in addition to ecx.
1010
        ret
1011
.low_speed:
1012
; Low-speed interrupt IN/OUT:
1013
; multiply by 8 for LS -> FS,
1014
; add 85 bytes as in full-speed interrupt and extra 5 bytes for two PRE packets
1015
; and two hub delays.
1016
; 1 byte is minimal TT think time in addition to ecx.
1017
        lea     eax, [eax*8+90+ecx+1]
1018
        ret
1019
endp
1020
 
1021
; Helper procedure for TT scheduler.
1022
; Calculates Start-Split/Complete-Split masks and HS bandwidths.
1023
; ebx -> usb_pipe, edi -> usb_split_info
1024
proc tt_fill_split_info
1025
; Interrupt endpoints.
1026
; The core spec says in 5.7.3 "Interrupt Transfer Packet Size Constraints" that:
1027
; The maximum allowable interrupt data payload size is 64 bytes or less for full-speed.
1028
; Low-speed devices are limited to eight bytes or less maximum data payload size.
1029
; This is important for scheduling, it guarantees that in any case transaction fits
1030
; in two microframes (usually one, two if transaction has started too late in the first
1031
; microframe), so check it.
1032
        mov     eax, [ebx+ehci_pipe.Token-sizeof.ehci_pipe]
1033
        mov     ecx, 8
1034
        bt      eax, 12
1035
        jc      @f
1036
        mov     ecx, 64
1037
@@:
1038
        shr     eax, 16
1039
        and     eax, (1 shl 11) - 1     ; get data length
1040
        cmp     eax, ecx
1041
        ja      .error
1042
        add     eax, 3  ; add 3 bytes for other fields in data packet, PID+CRC16
1043
; Multiply by 8 for bytes -> bits and then by 7/6 to accomodate bit stuffing;
1044
; total 28/3 = 9+1/3
1045
        mov     edx, 55555556h
1046
        lea     ecx, [eax*9]
1047
        mul     edx
1048
; One start-split, three complete-splits (unless the last is too far,
1049
; but this is handled by the caller).
1050
        mov     eax, [ebx+usb_pipe.LastTD]
1051
        mov     [edi+usb_split_info.microframe_mask], 0x1C01
1052
; Structure and HS bandwidth of packets depends on the direction.
1053
        bt      [eax+ehci_gtd.Token-sizeof.ehci_gtd], 8
1054
        jc      .interrupt_in
1055
.interrupt_out:
1056
; Start-Split phase:
1057
; 77 bits for SPLIT packet (32 for SYNC, 8 for EOP, 32 for data, 5 for bit stuffing),
1058
; 88 bits for inter-packet delay, 68 bits for Token packet,
1059
; 88 bits for inter-packet delay, 40 bits for SYNC+EOP in Data packet,
1060
; 88 bits for last inter-packet delay, total 449 bits.
1061
        lea     eax, [edx+ecx+449]
1062
        mov     [edi+usb_split_info.ssplit_bandwidth], eax
1063
; Complete-Split phase:
1064
; 77 bits for SPLIT packet,
1065
; 88 bits for inter-packet delay, 68 bits for Token packet,
1066
; 736 bits for bus turn-around, 49 bits for Handshake packet,
1067
; 8 bits for inter-packet delay, total 1026 bits.
1068
        mov     [edi+usb_split_info.csplit_bandwidth], 1026
1069
        ret
1070
.interrupt_in:
1071
; Start-Split phase:
1072
; 77 bits for SPLIT packet, 88 bits for inter-packet delay,
1073
; 68 bits for Token packet, 88 bits for another inter-packet delay,
1074
; total 321 bits.
1075
        mov     [edi+usb_split_info.ssplit_bandwidth], 321
1076
; Complete-Split phase:
1077
; 77 bits for SPLIT packet, 88 bits for inter-packet delay,
1078
; 68 bits for Token packet, 736 bits for bus turn-around,
1079
; 40 bits for SYNC+EOP in Data packet, 8 bits for inter-packet delay,
1080
; total 1017 bits.
1081
        lea     eax, [edx+ecx+1017]
1082
        mov     [edi+usb_split_info.csplit_bandwidth], eax
1083
        ret
1084
.error:
1085
        xor     eax, eax
1086
        ret
1087
endp