Subversion Repositories Kolibri OS

Rev

Rev 4418 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3520 clevermous 1
; Implementation of periodic transaction scheduler for USB.
2
; Bandwidth dedicated to periodic transactions is limited, so
3
; different pipes should be scheduled as uniformly as possible.
4
 
5
; USB2 scheduler.
6
; There are two parts: high-speed pipes and split-transaction pipes.
3826 clevermous 7
;
3520 clevermous 8
; High-speed scheduler uses the same algorithm as USB1 scheduler:
9
; when adding a pipe, optimize the following quantity:
10
;  * for every microframe, take all bandwidth scheduled to periodic transfers,
3826 clevermous 11
;  * calculate maximum over all microframes,
3520 clevermous 12
;  * select a variant which minimizes that maximum;
3826 clevermous 13
;  * if there are several such variants,
14
;    prefer those that are closer to end of frame
15
;    to minimize collisions with split transactions;
3520 clevermous 16
; when removing a pipe, do nothing (except for bookkeeping).
17
; in: esi -> usb_controller
18
; out: edx -> usb_static_ep, eax = S-Mask
19
proc ehci_select_hs_interrupt_list
20
; inherit some variables from usb_open_pipe
21
virtual at ebp-12
22
.targetsmask    dd      ?
23
.bandwidth      dd      ?
24
.target         dd      ?
25
                dd      ?
26
                dd      ?
27
.config_pipe    dd      ?
28
.endpoint       dd      ?
29
.maxpacket      dd      ?
30
.type           dd      ?
31
.interval       dd      ?
32
end virtual
33
; prolog, initialize local vars
34
        or      [.bandwidth], -1
35
        or      [.target], -1
36
        or      [.targetsmask], -1
37
        push    ebx edi         ; save used registers to be stdcall
38
; 1. In EHCI, every list describes one millisecond = 8 microframes.
39
; Thus, there are two significantly different branches:
40
; for pipes with interval >= 8 microframes, advance to 2,
41
; for pipes which should be planned in every frame (one or more microframes),
42
; go to 9.
43
; Note: the actual interval for high-speed devices is 2^([.interval]-1),
44
; (the core specification forbids [.interval] == 0)
45
        mov     ecx, [.interval]
46
        dec     ecx
47
        cmp     ecx, 3
48
        jb      .every_frame
49
; 2. Determine the actual interval in milliseconds.
50
        sub     ecx, 3
51
        cmp     ecx, 5  ; maximum 32ms
52
        jbe     @f
3598 clevermous 53
        movi    ecx, 5
3520 clevermous 54
@@:
55
; There are four nested loops,
56
; * Loop #4 (the innermost one) calculates the total periodic bandwidth
57
;   scheduled in the given microframe of the given millisecond.
58
; * Loop #3 calculates the maximum over all milliseconds
59
;   in the given variant, that is the quantity we're trying to minimize.
60
; * Loops #1 and #2 check all variants;
61
;   loop #1 is responsible for the target millisecond,
62
;   loop #2 is responsible for the microframe within millisecond.
63
; 3. Prepare for loops.
64
; ebx = number of iterations of loop #1
65
; [esp] = delta of counter for loop #3, in bytes
66
; [esp+4] = delta between the first group and the target group, in bytes
3598 clevermous 67
        movi    ebx, 1
68
        movi    edx, sizeof.ehci_static_ep
3520 clevermous 69
        shl     ebx, cl
70
        shl     edx, cl
71
        mov     eax, 64*sizeof.ehci_static_ep
72
        sub     eax, edx
73
        sub     eax, edx
74
        push    eax
75
        push    edx
76
; 4. Select the best variant.
77
; 4a. Loop #1: initialize counter = pointer to ehci_static_ep for
78
; the target millisecond in the first group.
79
        lea     edx, [esi+ehci_controller.IntEDs-sizeof.ehci_controller]
80
.varloop0:
81
; 4b. Loop #2: initialize counter = microframe within the target millisecond.
82
        xor     ecx, ecx
83
.varloop:
84
; 4c. Loop #3: save counter of loop #1,
85
; initialize counter with the value of loop #1 counter,
86
; initialize maximal bandwidth = zero.
87
        xor     edi, edi
88
        push    edx
89
virtual at esp
90
.saved_counter1         dd      ?       ; step 4c
91
.loop3_delta            dd      ?       ; step 3
92
.target_delta           dd      ?       ; step 3
93
end virtual
94
.calc_max_bandwidth:
95
; 4d. Loop #4: initialize counter with the value of loop #3 counter,
96
; initialize total bandwidth = zero.
97
        xor     eax, eax
98
        push    edx
99
.calc_bandwidth:
100
; 4e. Loop #4: add the bandwidth from the current list
101
; and advance to the next list, while there is one.
102
        add     ax, [edx+ehci_static_ep.Bandwidths+ecx*2]
103
        mov     edx, [edx+ehci_static_ep.NextList]
104
        test    edx, edx
105
        jnz     .calc_bandwidth
106
; 4f. Loop #4 end: restore counter of loop #3.
107
        pop     edx
108
; 4g. Loop #3: update maximal bandwidth.
109
        cmp     eax, edi
110
        jb      @f
111
        mov     edi, eax
112
@@:
113
; 4h. Loop #3: advance the counter and repeat while within the first group.
114
        lea     eax, [esi+ehci_controller.IntEDs+32*sizeof.ehci_static_ep-sizeof.ehci_controller]
115
        add     edx, [.loop3_delta]
116
        cmp     edx, eax
117
        jb      .calc_max_bandwidth
118
; 4i. Loop #3 end: restore counter of loop #1.
119
        pop     edx
120
; 4j. Loop #2: if the current variant is better (maybe not strictly)
121
; then the previous optimum, update the optimal bandwidth and the target.
122
        cmp     edi, [.bandwidth]
123
        ja      @f
3826 clevermous 124
        jb      .update
125
        cmp     ecx, [.targetsmask]
126
        jb      @f
127
.update:
3520 clevermous 128
        mov     [.bandwidth], edi
129
        mov     [.target], edx
3826 clevermous 130
        mov     [.targetsmask], ecx
3520 clevermous 131
@@:
132
; 4k. Loop #2: continue 8 times for every microframe.
133
        inc     ecx
134
        cmp     ecx, 8
135
        jb      .varloop
136
; 4l. Loop #1: advance counter and repeat ebx times,
137
; ebx was calculated in step 3.
138
        add     edx, sizeof.ehci_static_ep
139
        dec     ebx
140
        jnz     .varloop0
3816 clevermous 141
; 5. Calculate bandwidth for the new pipe.
3520 clevermous 142
        mov     eax, [.maxpacket]
3816 clevermous 143
        call    calc_hs_bandwidth
144
        mov     ecx, [.maxpacket]
3520 clevermous 145
        shr     ecx, 11
146
        inc     ecx
147
        and     ecx, 3
148
        imul    eax, ecx
3816 clevermous 149
; 6. Get the pointer to the best list.
150
        pop     edx             ; restore value from step 3
151
        pop     edx             ; get delta calculated in step 3
152
        add     edx, [.target]
153
; 7. Check that bandwidth for the new pipe plus old bandwidth
3520 clevermous 154
; still fits to maximum allowed by the core specification
155
; current [.bandwidth] + new bandwidth <= limit;
156
; USB2 specification allows maximum 60000*80% bit times for periodic microframe
3816 clevermous 157
        mov     ecx, [.bandwidth]
158
        add     ecx, eax
159
        cmp     ecx, 48000
160
        ja      .no_bandwidth
3520 clevermous 161
; 8. Convert {o|u}hci_static_ep to usb_static_ep, update bandwidth and return.
162
        mov     ecx, [.targetsmask]
163
        add     [edx+ehci_static_ep.Bandwidths+ecx*2], ax
164
        add     edx, ehci_static_ep.SoftwarePart
3598 clevermous 165
        movi    eax, 1
3520 clevermous 166
        shl     eax, cl
167
        pop     edi ebx         ; restore used registers to be stdcall
168
        ret
3816 clevermous 169
.no_bandwidth:
170
        dbgstr 'Periodic bandwidth limit reached'
171
        xor     eax, eax
172
        xor     edx, edx
173
        pop     edi ebx
174
        ret
3520 clevermous 175
.every_frame:
176
; The pipe should be scheduled every frame in two or more microframes.
177
; 9. Calculate maximal bandwidth for every microframe: three nested loops.
178
; 9a. The outermost loop: ebx = microframe to calculate.
179
        xor     ebx, ebx
180
.calc_all_bandwidths:
181
; 9b. The intermediate loop:
182
; edx = pointer to ehci_static_ep in the first group, [esp] = counter,
183
; edi = maximal bandwidth
184
        lea     edx, [esi+ehci_controller.IntEDs-sizeof.ehci_controller]
185
        xor     edi, edi
186
        push    32
187
.calc_max_bandwidth2:
188
; 9c. The innermost loop: calculate bandwidth for the given microframe
189
; in the given frame.
190
        xor     eax, eax
191
        push    edx
192
.calc_bandwidth2:
193
        add     ax, [edx+ehci_static_ep.Bandwidths+ebx*2]
194
        mov     edx, [edx+ehci_static_ep.NextList]
195
        test    edx, edx
196
        jnz     .calc_bandwidth2
197
        pop     edx
198
; 9d. The intermediate loop continued: update maximal bandwidth.
199
        cmp     eax, edi
200
        jb      @f
201
        mov     edi, eax
202
@@:
203
        add     edx, sizeof.ehci_static_ep
204
        dec     dword [esp]
205
        jnz     .calc_max_bandwidth2
206
        pop     eax
207
; 9e. Push the calculated maximal bandwidth and continue the outermost loop.
208
        push    edi
209
        inc     ebx
210
        cmp     ebx, 8
211
        jb      .calc_all_bandwidths
212
virtual at esp
213
.bandwidth7     dd      ?
214
.bandwidth6     dd      ?
215
.bandwidth5     dd      ?
216
.bandwidth4     dd      ?
217
.bandwidth3     dd      ?
218
.bandwidth2     dd      ?
219
.bandwidth1     dd      ?
220
.bandwidth0     dd      ?
221
end virtual
222
; 10. Select the best variant.
223
; edx = S-Mask = bitmask of scheduled microframes
3598 clevermous 224
        movi    edx, 0x11
3520 clevermous 225
        cmp     ecx, 1
226
        ja      @f
227
        mov     dl, 0x55
228
        jz      @f
229
        mov     dl, 0xFF
230
@@:
231
; try all variants edx, edx shl 1, edx shl 2, ...
3826 clevermous 232
; while they fit in the lower byte (8 microframes per frame)
3520 clevermous 233
.select_best_mframe:
234
        xor     edi, edi
235
        mov     ecx, edx
236
        mov     eax, esp
237
.calc_mframe:
238
        add     cl, cl
239
        jnc     @f
240
        cmp     edi, [eax]
241
        jae     @f
242
        mov     edi, [eax]
243
@@:
244
        add     eax, 4
245
        test    cl, cl
246
        jnz     .calc_mframe
247
        cmp     [.bandwidth], edi
248
        jb      @f
249
        mov     [.bandwidth], edi
250
        mov     [.targetsmask], edx
251
@@:
252
        add     dl, dl
253
        jnc     .select_best_mframe
254
; 11. Restore stack after step 9.
255
        add     esp, 8*4
256
; 12. Get the pointer to the target list (responsible for every microframe).
257
        lea     edx, [esi+ehci_controller.IntEDs.SoftwarePart+62*sizeof.ehci_static_ep-sizeof.ehci_controller]
3816 clevermous 258
; 13. Calculate bandwidth on the bus.
3520 clevermous 259
        mov     eax, [.maxpacket]
3816 clevermous 260
        call    calc_hs_bandwidth
261
        mov     ecx, [.maxpacket]
3520 clevermous 262
        shr     ecx, 11
263
        inc     ecx
264
        and     ecx, 3
265
        imul    eax, ecx
3816 clevermous 266
; 14. Check that current [.bandwidth] + new bandwidth <= limit;
3520 clevermous 267
; USB2 specification allows maximum 60000*80% bit times for periodic microframe.
3816 clevermous 268
        mov     ecx, [.bandwidth]
269
        add     ecx, eax
270
        cmp     ecx, 48000
271
        ja      .no_bandwidth
272
; 15. Update bandwidths including the new pipe.
3520 clevermous 273
        mov     ecx, [.targetsmask]
274
        lea     edi, [edx+ehci_static_ep.Bandwidths-ehci_static_ep.SoftwarePart]
275
.update_bandwidths:
276
        shr     ecx, 1
277
        jnc     @f
278
        add     [edi], ax
279
@@:
280
        add     edi, 2
281
        test    ecx, ecx
282
        jnz     .update_bandwidths
3816 clevermous 283
; 16. Return target list and target S-Mask.
3520 clevermous 284
        mov     eax, [.targetsmask]
285
        pop     edi ebx         ; restore used registers to be stdcall
286
        ret
287
endp
288
 
289
; Pipe is removing, update the corresponding lists.
290
; We do not reorder anything, so just update book-keeping variable
291
; in the list header.
292
proc ehci_hs_interrupt_list_unlink
3653 clevermous 293
        movzx   eax, word [ebx+ehci_pipe.Token-sizeof.ehci_pipe+2]
3816 clevermous 294
; calculate bandwidth
295
        call    calc_hs_bandwidth
3653 clevermous 296
        mov     ecx, [ebx+ehci_pipe.Flags-sizeof.ehci_pipe]
3520 clevermous 297
        shr     ecx, 30
298
        imul    eax, ecx
3653 clevermous 299
        movzx   ecx, byte [ebx+ehci_pipe.Flags-sizeof.ehci_pipe]
3826 clevermous 300
; get target list
4547 clevermous 301
        mov     edx, [ebx+usb_pipe.BaseList]
3520 clevermous 302
; update bandwidth
303
.dec_bandwidth:
304
        shr     ecx, 1
305
        jnc     @f
3826 clevermous 306
        sub     word [edx+ehci_static_ep.Bandwidths - ehci_static_ep.SoftwarePart], ax
3520 clevermous 307
@@:
308
        add     edx, 2
309
        test    ecx, ecx
310
        jnz     .dec_bandwidth
311
; return
312
        ret
313
endp
314
 
3816 clevermous 315
; Helper procedure for USB2 scheduler: calculate bandwidth on the bus.
316
; in: low 11 bits of eax = payload size in bytes
317
; out: eax = maximal bandwidth in HS-bits
318
proc calc_hs_bandwidth
319
        and     eax, (1 shl 11) - 1     ; get payload for one transaction
320
        add     eax, 3  ; add 3 bytes for other fields in data packet, PID+CRC16
321
; Multiply by 8 for bytes -> bits and then by 7/6 to accomodate bit stuffing;
322
; total 28/3 = 9+1/3
323
        mov     edx, 55555556h
324
        lea     ecx, [eax*9]
325
        mul     edx
326
; Add 989 extra bits: 68 bits for Token packet (32 for SYNC, 24 for token+address,
327
; 4 extra bits for possible bit stuffing in token+address, 8 for EOP),
328
; 736 bits for bus turn-around, 40 bits for SYNC+EOP in Data packet,
329
; 8 bits for inter-packet delay, 49 bits for Handshake packet,
330
; 88 bits for another inter-packet delay.
331
        lea     eax, [ecx+edx+989]
332
        ret
333
endp
334
 
3826 clevermous 335
; Split-transaction scheduler (aka TT scheduler, TT stands for Transaction
336
; Translator, section 11.14 of the core spec) needs to schedule three event
337
; types on two buses: Start-Split and Complete-Split on HS bus and normal
338
; transaction on FS/LS bus.
339
; Assume that FS/LS bus is more restricted and more important to be scheduled
340
; uniformly, so select the variant which minimizes maximal used bandwidth
341
; on FS/LS bus and does not overflow HS bus.
342
; If there are several such variants, prefer variants which is closest to
343
; start of frame, and within the same microframe consider HS bandwidth
344
; utilization as a last criteria.
3520 clevermous 345
 
3826 clevermous 346
; The procedure ehci_select_tt_interrupt_list has been splitted into several
347
; macro, each representing a logical step of the procedure,
348
; to simplify understanding what is going on. Consider all the following macro
349
; as logical parts of one procedure, they are meaningless outside the context.
350
 
351
; Given a frame, calculate bandwidth occupied by already opened pipes
352
; in every microframe.
353
; Look for both HS and FS/LS buses: there are 16 words of information,
354
; 8 for HS bus, 8 for FS/LS bus, for every microframe.
355
; Since we count already opened pipes, the total bandwidth in every microframe
356
; is less than 60000 bits (and even 60000*80% bits), otherwise the scheduler
357
; would not allow to open those pipes.
358
; edi -> first list for the frame
359
macro tt_calc_bandwidth_in_frame
360
{
361
local .lists, .pipes, .pipes_done, .carry
362
; 1. Zero everything.
363
        xor     eax, eax
364
        mov     edx, edi
365
repeat 4
366
        mov     dword [.budget+(%-1)*4], eax
367
end repeat
368
repeat 4
369
        mov     dword [.hs_bandwidth+(%-1)*4], eax
370
end repeat
371
        mov     [.total_budget], ax
372
; Loop over all lists for the given frame.
373
.lists:
374
; 2. Total HS bandwidth for all pipes in one list is kept inside list header,
375
; add it. Note that overflow is impossible, so we may add entire dwords.
376
        mov     ebx, [edx+ehci_static_ep.SoftwarePart+usb_static_ep.NextVirt]
377
repeat 4
378
        mov     eax, dword [edx+ehci_static_ep.Bandwidths+(%-1)*4]
379
        add     dword [.hs_bandwidth+(%-1)*4], eax
380
end repeat
381
; Loop over all pipes in the given list.
382
        add     edx, ehci_static_ep.SoftwarePart
383
.pipes:
384
        cmp     ebx, edx
385
        jz      .pipes_done
386
; 3. For every pipe in every list for the given frame:
387
; 3a. Check whether the pipe resides on the same FS/LS bus as the new pipe.
388
; If not, skip this pipe.
389
        mov     eax, [ebx+usb_pipe.DeviceData]
390
        mov     eax, [eax+usb_device_data.TTHub]
391
        cmp     eax, [.tthub]
392
        jnz     @f
393
; 3b. Calculate FS/LS budget for the opened pipe.
394
; Note that eax = TTHub after 3a.
395
        call    tt_calc_budget
396
; 3c. Update total budget: add the value from 3b
397
; to the budget of the first microframe scheduled for this pipe.
398
        bsf     ecx, [ebx+ehci_pipe.Flags-sizeof.ehci_pipe]
399
        add     [.budget+ecx*2], ax
400
@@:
401
        mov     ebx, [ebx+usb_pipe.NextVirt]
402
        jmp     .pipes
403
.pipes_done:
404
        mov     edx, [edx+ehci_static_ep.NextList-ehci_static_ep.SoftwarePart]
405
        test    edx, edx
406
        jnz     .lists
407
; 4. If the budget for some microframe is exceeded, carry it to the following
408
; microframe(s). The actual size of one microframe is 187.5 raw bytes;
409
; the core spec says that 188 bytes should be scheduled in every microframe.
410
        xor     eax, eax
411
        xor     ecx, ecx
412
.carry:
413
        xor     edx, edx
414
        add     ax, [.budget+ecx*2]
415
        cmp     ax, 188
416
        jbe     @f
417
        mov     dx, ax
418
        mov     ax, 188
419
        sub     dx, ax
420
@@:
421
        mov     [.budget+ecx*2], ax
422
        add     [.total_budget], ax
423
        mov     ax, dx
424
        inc     ecx
425
        cmp     ecx, 8
426
        jb      .carry
427
}
428
 
429
; Checks whether the new pipe fits in the existing FS budget
430
; starting from the given microframe. If not, mark the microframe
431
; as impossible for scheduling.
432
; in: ecx = microframe
433
macro tt_exclude_microframe_if_no_budget
434
{
435
local .loop, .good, .bad
436
; 1. If the new budget plus the current budget does not exceed 188 bytes,
437
; the variant is possible.
438
        mov     ax, [.budget+ecx*2]
439
        mov     edx, ecx
440
        add     ax, [.new_budget]
441
        sub     ax, 188
442
        jbe     .good
443
; 2. Otherwise,
444
; a) nothing should be scheduled in some following microframes,
445
; b) after adding the new budget everything should fit in first 6 microframes,
446
;    this guarantees that even in the worst case 90% limit is satisfied.
447
.loop:
448
        cmp     edx, 5
449
        jae     .bad
450
        cmp     [.budget+(edx+1)*2], 0
451
        jnz     .bad
452
        inc     edx
453
        sub     ax, 188
454
        ja      .loop
455
.bad:
456
        btr     [.possible_microframes], ecx
457
.good:
458
}
459
 
460
; Calculate data corresponding to the particular scheduling variant for the new pipe.
461
; Data describe the current scheduling state collected over all frames touched
462
; by the given variant: maximal HS bandwidth, maximal FS/LS budget,
463
; which microframes fit in the current FS/LS budget for all frames.
464
macro tt_calc_statistics_for_one_variant
465
{
466
local .frames, .microframes
467
; 1. Initialize: zero maximal bandwidth,
468
; first 6 microframes are possible for scheduling.
469
        xor     eax, eax
470
repeat 4
471
        mov     dword [.max_hs_bandwidth+(%-1)*4], eax
472
end repeat
473
        mov     [.max_fs_bandwidth], ax
474
        mov     [.possible_microframes], 0x3F
475
; Loop over all frames starting with [.variant] advancing by [.variant_delta].
476
        mov     edi, [.variant]
477
.frames:
478
; 2. Calculate statistics for one frame.
479
        tt_calc_bandwidth_in_frame
480
; 3. Update maximal FS budget.
481
        mov     ax, [.total_budget]
482
        cmp     ax, [.max_fs_bandwidth]
483
        jb      @f
484
        mov     [.max_fs_bandwidth], ax
485
@@:
486
; 4. For every microframe, update maximal HS bandwidth
487
; and check whether the microframe is allowed for scheduling.
488
        xor     ecx, ecx
489
.microframes:
490
        mov     ax, [.hs_bandwidth+ecx*2]
491
        cmp     ax, [.max_hs_bandwidth+ecx*2]
492
        jb      @f
493
        mov     [.max_hs_bandwidth+ecx*2], ax
494
@@:
495
        tt_exclude_microframe_if_no_budget
496
        inc     ecx
497
        cmp     ecx, 8
498
        jb      .microframes
499
; Stop loop when outside of first descriptor group.
500
        lea     eax, [esi+ehci_controller.IntEDs+32*sizeof.ehci_static_ep-sizeof.ehci_controller]
501
        add     edi, [.variant_delta]
502
        cmp     edi, eax
503
        jb      .frames
504
}
505
 
506
struct usb_split_info
507
microframe_mask         dd      ?       ; lower byte is S-mask, second byte is C-mask
508
ssplit_bandwidth        dd      ?
509
csplit_bandwidth        dd      ?
510
ends
511
 
512
; Check whether the current variant and the current microframe are allowed
513
; for scheduling. If so, check whether they are better than the previously
514
; selected variant+microframe, if any. If so, update the previously selected
515
; variant+microframe to current ones.
516
; ecx = microframe, [.variant] = variant
517
macro tt_check_variant_microframe
518
{
519
local .nothing, .update, .ssplit, .csplit, .csplit_done
520
; 1. If the current microframe does not fit in existing FS budget, do nothing.
521
        bt      [.possible_microframes], ecx
522
        jnc     .nothing
523
; 2. Calculate maximal HS bandwidth over all affected microframes.
524
; 2a. Start-split phase: one or more microframes starting with ecx,
525
; coded in lower byte of .info.microframe_mask.
526
        xor     ebx, ebx
527
        xor     edx, edx
528
.ssplit:
529
        lea     eax, [ecx+edx]
530
        movzx   eax, [.max_hs_bandwidth+eax*2]
531
        add     eax, [.info.ssplit_bandwidth]
532
        cmp     ebx, eax
533
        ja      @f
534
        mov     ebx, eax
535
@@:
536
        inc     edx
537
        bt      [.info.microframe_mask], edx
538
        jc      .ssplit
539
; 2b. Complete-split phase: zero or more microframes starting with
540
; ecx+(last start-split microframe)+2,
541
; coded in second byte of .info.microframe_mask.
542
        add     edx, 8
543
.csplit:
544
        inc     edx
545
        bt      [.info.microframe_mask], edx
546
        jnc     .csplit_done
547
        lea     eax, [ecx+edx]
548
        cmp     eax, 8
549
        jae     .csplit_done
550
        movzx   eax, [.max_hs_bandwidth+(eax-8)*2]
551
        add     eax, [.info.csplit_bandwidth]
552
        cmp     ebx, eax
553
        ja      .csplit
554
        mov     ebx, eax
555
        jmp     .csplit
556
.csplit_done:
557
; 3. Check that current HS bandwidth + new bandwidth <= limit;
558
; USB2 specification allows maximum 60000*80% bit times for periodic microframe.
559
        cmp     ebx, 48000
560
        ja      .nothing
561
; 4. This variant is possible for scheduling.
562
; Check whether it is better than the currently selected one.
563
; 4a. The primary criteria: FS/LS bandwidth.
564
        mov     ax, [.max_fs_bandwidth]
565
        cmp     ax, [.best_fs_bandwidth]
566
        ja      .nothing
567
        jb      .update
568
; 4b. The secondary criteria: prefer microframes which are closer to start of frame.
569
        cmp     ecx, [.targetsmask]
570
        ja      .nothing
571
        jb      .update
572
; 4c. The last criteria: HS bandwidth.
573
        cmp     ebx, [.bandwidth]
574
        ja      .nothing
575
.update:
576
; 5. This variant is better than the previously selected.
577
; Update the best variant with current data.
578
        mov     [.best_fs_bandwidth], ax
579
        mov     [.bandwidth], ebx
580
        mov     [.targetsmask], ecx
581
        mov     eax, [.variant]
582
        mov     [.target], eax
583
.nothing:
584
}
585
 
586
; TT scheduler: add new pipe.
587
; in: esi -> usb_controller, edi -> usb_pipe
588
; out: edx -> usb_static_ep, eax = S-Mask
589
proc ehci_select_tt_interrupt_list
590
virtual at ebp-12-.local_vars_size
591
.local_vars_start:
592
.info                   usb_split_info
593
.new_budget             dw      ?
594
.total_budget           dw      ?
595
.possible_microframes   dd      ?
596
.tthub                  dd      ?
597
.budget                 rw      8
598
.hs_bandwidth           rw      8
599
.max_hs_bandwidth       rw      8
600
.max_fs_bandwidth       dw      ?
601
.best_fs_bandwidth      dw      ?
602
.variant                dd      ?
603
.variant_delta          dd      ?
604
.target_delta           dd      ?
605
.local_vars_size = $ - .local_vars_start
4418 clevermous 606
if .local_vars_size > 24*4
607
err Modify stack frame size in
608
end if
3826 clevermous 609
 
3520 clevermous 610
.targetsmask    dd      ?
611
.bandwidth      dd      ?
612
.target         dd      ?
613
                dd      ?
614
                dd      ?
615
.config_pipe    dd      ?
616
.endpoint       dd      ?
617
.maxpacket      dd      ?
618
.type           dd      ?
619
.interval       dd      ?
620
end virtual
3826 clevermous 621
        mov     eax, [edi+ehci_pipe.Token-sizeof.ehci_pipe]
622
        shr     eax, 16
623
        and     eax, (1 shl 11) - 1
624
        push    ebx edi
625
; 1. Compute the real interval. FS/LS devices encode the interval as
626
; number of milliseconds. Use the maximal power of two that is not greater than
627
; the given interval and EHCI scheduling area = 32 frames.
3520 clevermous 628
        cmp     [.interval], 1
629
        adc     [.interval], 0
630
        mov     ecx, 64
3826 clevermous 631
        mov     eax, 64 * sizeof.ehci_static_ep
3520 clevermous 632
@@:
633
        shr     ecx, 1
634
        cmp     [.interval], ecx
635
        jb      @b
3826 clevermous 636
        mov     [.interval], ecx
637
; 2. Compute variables for further calculations.
638
; 2a. [.variant_delta] is delta between two lists from the first group
639
; that correspond to the same variant.
640
        imul    ecx, sizeof.ehci_static_ep
641
        mov     [.variant_delta], ecx
642
; 2b. [.target_delta] is delta between the final answer from the group
643
; corresponding to [.interval] and the item from the first group.
3520 clevermous 644
        sub     eax, ecx
645
        sub     eax, ecx
3826 clevermous 646
        mov     [.target_delta], eax
647
; 2c. [.variant] is the first list from the first group that corresponds
648
; to the current variant.
649
        lea     eax, [esi+ehci_controller.IntEDs-sizeof.ehci_controller]
650
        mov     [.variant], eax
651
; 2d. [.tthub] identifies TT hub for new pipe, [.new_budget] is FS budget
652
; for new pipe.
653
        mov     eax, [edi+usb_pipe.DeviceData]
654
        mov     eax, [eax+usb_device_data.TTHub]
655
        mov     ebx, edi
656
        mov     [.tthub], eax
657
        call    tt_calc_budget
658
        mov     [.new_budget], ax
659
; 2e. [.usb_split_info] describes bandwidth used by new pipe on HS bus.
660
        lea     edi, [.info]
661
        call    tt_fill_split_info
662
        test    eax, eax
663
        jz      .no_bandwidth
664
; 2f. There is no best variant yet, put maximal possible values,
665
; so any variant would be better than the "current".
666
        or      [.best_fs_bandwidth], -1
667
        or      [.target], -1
668
        or      [.bandwidth], -1
669
        or      [.targetsmask], -1
670
; 3. Loop over all variants, for every variant decide whether it is acceptable,
671
; select the best variant from all acceptable variants.
672
.check_variants:
673
        tt_calc_statistics_for_one_variant
674
        xor     ecx, ecx
675
.check_microframes:
676
        tt_check_variant_microframe
677
        inc     ecx
678
        cmp     ecx, 6
679
        jb      .check_microframes
680
        add     [.variant], sizeof.ehci_static_ep
681
        dec     [.interval]
682
        jnz     .check_variants
683
; 4. If there is no acceptable variants, return error.
684
        mov     ecx, [.targetsmask]
685
        mov     edx, [.target]
686
        cmp     ecx, -1
687
        jz      .no_bandwidth
688
; 5. Calculate the answer: edx -> selected list, eax = S-Mask and C-Mask.
689
        mov     eax, [.info.microframe_mask]
690
        add     edx, [.target_delta]
691
        shl     eax, cl
692
        and     eax, 0xFFFF
693
; 6. Update HS bandwidths in the selected list.
694
        xor     ecx, ecx
695
        mov     ebx, [.info.ssplit_bandwidth]
696
.update_ssplit:
697
        bt      eax, ecx
698
        jnc     @f
699
        add     [edx+ehci_static_ep.Bandwidths+ecx*2], bx
700
@@:
701
        inc     ecx
702
        cmp     ecx, 8
703
        jb      .update_ssplit
704
        mov     ebx, [.info.csplit_bandwidth]
705
.update_csplit:
706
        bt      eax, ecx
707
        jnc     @f
708
        add     [edx+ehci_static_ep.Bandwidths+(ecx-8)*2], bx
709
@@:
710
        inc     ecx
711
        cmp     ecx, 16
712
        jb      .update_csplit
713
; 7. Return.
714
        add     edx, ehci_static_ep.SoftwarePart
715
        pop     edi ebx
3520 clevermous 716
        ret
3826 clevermous 717
.no_bandwidth:
718
        dbgstr 'Periodic bandwidth limit reached'
719
        xor     eax, eax
720
        xor     edx, edx
721
        pop     edi ebx
722
        ret
3520 clevermous 723
endp
724
 
3826 clevermous 725
; Pipe is removing, update the corresponding lists.
726
; We do not reorder anything, so just update book-keeping variable
727
; in the list header.
3520 clevermous 728
proc ehci_fs_interrupt_list_unlink
3826 clevermous 729
; calculate bandwidth
730
        push    edi
731
        sub     esp, sizeof.usb_split_info
732
        mov     edi, esp
733
        call    tt_fill_split_info
734
; get target list
4547 clevermous 735
        mov     edx, [ebx+usb_pipe.BaseList]
3826 clevermous 736
; update bandwidth for Start-Split
737
        mov     eax, [edi+usb_split_info.ssplit_bandwidth]
738
        xor     ecx, ecx
739
.dec_bandwidth_1:
740
        bt      [ebx+ehci_pipe.Flags-sizeof.ehci_pipe], ecx
741
        jnc     @f
742
        sub     word [edx+ecx*2+ehci_static_ep.Bandwidths - ehci_static_ep.SoftwarePart], ax
743
@@:
744
        inc     ecx
745
        cmp     ecx, 8
746
        jb      .dec_bandwidth_1
747
; update bandwidth for Complete-Split
748
        mov     eax, [edi+usb_split_info.csplit_bandwidth]
749
.dec_bandwidth_2:
750
        bt      [ebx+ehci_pipe.Flags-sizeof.ehci_pipe], ecx
751
        jnc     @f
752
        sub     word [edx+(ecx-8)*2+ehci_static_ep.Bandwidths - ehci_static_ep.SoftwarePart], ax
753
@@:
754
        inc     ecx
755
        cmp     ecx, 16
756
        jb      .dec_bandwidth_2
757
        add     esp, sizeof.usb_split_info
758
        pop     edi
3520 clevermous 759
        ret
760
endp
3826 clevermous 761
 
762
; Helper procedure for ehci_select_tt_interrupt_list.
763
; Calculates "best-case budget" according to the core spec,
764
; that is, number of bytes (not bits) corresponding to "optimistic" transaction
765
; time, including inter-packet delays/bus turn-around time,
766
; but without bit stuffing and timers drift.
767
; One extra TT-specific delay is added: TT think time from the hub descriptor.
768
; Similar to calc_usb1_bandwidth with corresponding changes.
769
; eax -> usb_hub with TT, ebx -> usb_pipe
770
proc tt_calc_budget
4418 clevermous 771
        invoke  usbhc_api.usb_get_tt_think_time ; ecx = TT think time in FS-bytes
3826 clevermous 772
        mov     eax, [ebx+ehci_pipe.Token-sizeof.ehci_pipe]
773
        shr     eax, 16
774
        and     eax, (1 shl 11) - 1     ; get data length
775
        bt      [ebx+ehci_pipe.Token-sizeof.ehci_pipe], 12
776
        jc      .low_speed
777
; Full-speed interrupt IN/OUT:
778
; 33 bits for Token packet (8 for SYNC, 24 for token+address, 3 for EOP),
779
; 18 bits for bus turn-around, 11 bits for SYNC+EOP in Data packet,
780
; 2 bits for inter-packet delay, 19 bits for Handshake packet,
781
; 2 bits for another inter-packet delay. 85 bits total, pad to 11 bytes.
4418 clevermous 782
        lea     eax, [eax+11+ecx]
3826 clevermous 783
; 1 byte is minimal TT think time in addition to ecx.
784
        ret
785
.low_speed:
786
; Low-speed interrupt IN/OUT:
787
; multiply by 8 for LS -> FS,
788
; add 85 bytes as in full-speed interrupt and extra 5 bytes for two PRE packets
789
; and two hub delays.
790
; 1 byte is minimal TT think time in addition to ecx.
4418 clevermous 791
        lea     eax, [eax*8+90+ecx]
3826 clevermous 792
        ret
793
endp
794
 
795
; Helper procedure for TT scheduler.
796
; Calculates Start-Split/Complete-Split masks and HS bandwidths.
797
; ebx -> usb_pipe, edi -> usb_split_info
798
proc tt_fill_split_info
799
; Interrupt endpoints.
800
; The core spec says in 5.7.3 "Interrupt Transfer Packet Size Constraints" that:
801
; The maximum allowable interrupt data payload size is 64 bytes or less for full-speed.
802
; Low-speed devices are limited to eight bytes or less maximum data payload size.
803
; This is important for scheduling, it guarantees that in any case transaction fits
804
; in two microframes (usually one, two if transaction has started too late in the first
805
; microframe), so check it.
806
        mov     eax, [ebx+ehci_pipe.Token-sizeof.ehci_pipe]
807
        mov     ecx, 8
808
        bt      eax, 12
809
        jc      @f
810
        mov     ecx, 64
811
@@:
812
        shr     eax, 16
813
        and     eax, (1 shl 11) - 1     ; get data length
814
        cmp     eax, ecx
815
        ja      .error
816
        add     eax, 3  ; add 3 bytes for other fields in data packet, PID+CRC16
817
; Multiply by 8 for bytes -> bits and then by 7/6 to accomodate bit stuffing;
818
; total 28/3 = 9+1/3
819
        mov     edx, 55555556h
820
        lea     ecx, [eax*9]
821
        mul     edx
822
; One start-split, three complete-splits (unless the last is too far,
823
; but this is handled by the caller).
824
        mov     eax, [ebx+usb_pipe.LastTD]
825
        mov     [edi+usb_split_info.microframe_mask], 0x1C01
826
; Structure and HS bandwidth of packets depends on the direction.
827
        bt      [eax+ehci_gtd.Token-sizeof.ehci_gtd], 8
828
        jc      .interrupt_in
829
.interrupt_out:
830
; Start-Split phase:
831
; 77 bits for SPLIT packet (32 for SYNC, 8 for EOP, 32 for data, 5 for bit stuffing),
832
; 88 bits for inter-packet delay, 68 bits for Token packet,
833
; 88 bits for inter-packet delay, 40 bits for SYNC+EOP in Data packet,
834
; 88 bits for last inter-packet delay, total 449 bits.
835
        lea     eax, [edx+ecx+449]
836
        mov     [edi+usb_split_info.ssplit_bandwidth], eax
837
; Complete-Split phase:
838
; 77 bits for SPLIT packet,
839
; 88 bits for inter-packet delay, 68 bits for Token packet,
840
; 736 bits for bus turn-around, 49 bits for Handshake packet,
841
; 8 bits for inter-packet delay, total 1026 bits.
842
        mov     [edi+usb_split_info.csplit_bandwidth], 1026
843
        ret
844
.interrupt_in:
845
; Start-Split phase:
846
; 77 bits for SPLIT packet, 88 bits for inter-packet delay,
847
; 68 bits for Token packet, 88 bits for another inter-packet delay,
848
; total 321 bits.
849
        mov     [edi+usb_split_info.ssplit_bandwidth], 321
850
; Complete-Split phase:
851
; 77 bits for SPLIT packet, 88 bits for inter-packet delay,
852
; 68 bits for Token packet, 736 bits for bus turn-around,
853
; 40 bits for SYNC+EOP in Data packet, 8 bits for inter-packet delay,
854
; total 1017 bits.
855
        lea     eax, [edx+ecx+1017]
856
        mov     [edi+usb_split_info.csplit_bandwidth], eax
857
        ret
858
.error:
859
        xor     eax, eax
860
        ret
861
endp