Subversion Repositories Kolibri OS

Rev

Rev 4418 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. ; Implementation of periodic transaction scheduler for USB.
  2. ; Bandwidth dedicated to periodic transactions is limited, so
  3. ; different pipes should be scheduled as uniformly as possible.
  4.  
  5. ; USB2 scheduler.
  6. ; There are two parts: high-speed pipes and split-transaction pipes.
  7. ;
  8. ; High-speed scheduler uses the same algorithm as USB1 scheduler:
  9. ; when adding a pipe, optimize the following quantity:
  10. ;  * for every microframe, take all bandwidth scheduled to periodic transfers,
  11. ;  * calculate maximum over all microframes,
  12. ;  * select a variant which minimizes that maximum;
  13. ;  * if there are several such variants,
  14. ;    prefer those that are closer to end of frame
  15. ;    to minimize collisions with split transactions;
  16. ; when removing a pipe, do nothing (except for bookkeeping).
  17. ; in: esi -> usb_controller
  18. ; out: edx -> usb_static_ep, eax = S-Mask
  19. proc ehci_select_hs_interrupt_list
  20. ; inherit some variables from usb_open_pipe
  21. virtual at ebp-12
  22. .targetsmask    dd      ?
  23. .bandwidth      dd      ?
  24. .target         dd      ?
  25.                 dd      ?
  26.                 dd      ?
  27. .config_pipe    dd      ?
  28. .endpoint       dd      ?
  29. .maxpacket      dd      ?
  30. .type           dd      ?
  31. .interval       dd      ?
  32. end virtual
  33. ; prolog, initialize local vars
  34.         or      [.bandwidth], -1
  35.         or      [.target], -1
  36.         or      [.targetsmask], -1
  37.         push    ebx edi         ; save used registers to be stdcall
  38. ; 1. In EHCI, every list describes one millisecond = 8 microframes.
  39. ; Thus, there are two significantly different branches:
  40. ; for pipes with interval >= 8 microframes, advance to 2,
  41. ; for pipes which should be planned in every frame (one or more microframes),
  42. ; go to 9.
  43. ; Note: the actual interval for high-speed devices is 2^([.interval]-1),
  44. ; (the core specification forbids [.interval] == 0)
  45.         mov     ecx, [.interval]
  46.         dec     ecx
  47.         cmp     ecx, 3
  48.         jb      .every_frame
  49. ; 2. Determine the actual interval in milliseconds.
  50.         sub     ecx, 3
  51.         cmp     ecx, 5  ; maximum 32ms
  52.         jbe     @f
  53.         movi    ecx, 5
  54. @@:
  55. ; There are four nested loops,
  56. ; * Loop #4 (the innermost one) calculates the total periodic bandwidth
  57. ;   scheduled in the given microframe of the given millisecond.
  58. ; * Loop #3 calculates the maximum over all milliseconds
  59. ;   in the given variant, that is the quantity we're trying to minimize.
  60. ; * Loops #1 and #2 check all variants;
  61. ;   loop #1 is responsible for the target millisecond,
  62. ;   loop #2 is responsible for the microframe within millisecond.
  63. ; 3. Prepare for loops.
  64. ; ebx = number of iterations of loop #1
  65. ; [esp] = delta of counter for loop #3, in bytes
  66. ; [esp+4] = delta between the first group and the target group, in bytes
  67.         movi    ebx, 1
  68.         movi    edx, sizeof.ehci_static_ep
  69.         shl     ebx, cl
  70.         shl     edx, cl
  71.         mov     eax, 64*sizeof.ehci_static_ep
  72.         sub     eax, edx
  73.         sub     eax, edx
  74.         push    eax
  75.         push    edx
  76. ; 4. Select the best variant.
  77. ; 4a. Loop #1: initialize counter = pointer to ehci_static_ep for
  78. ; the target millisecond in the first group.
  79.         lea     edx, [esi+ehci_controller.IntEDs-sizeof.ehci_controller]
  80. .varloop0:
  81. ; 4b. Loop #2: initialize counter = microframe within the target millisecond.
  82.         xor     ecx, ecx
  83. .varloop:
  84. ; 4c. Loop #3: save counter of loop #1,
  85. ; initialize counter with the value of loop #1 counter,
  86. ; initialize maximal bandwidth = zero.
  87.         xor     edi, edi
  88.         push    edx
  89. virtual at esp
  90. .saved_counter1         dd      ?       ; step 4c
  91. .loop3_delta            dd      ?       ; step 3
  92. .target_delta           dd      ?       ; step 3
  93. end virtual
  94. .calc_max_bandwidth:
  95. ; 4d. Loop #4: initialize counter with the value of loop #3 counter,
  96. ; initialize total bandwidth = zero.
  97.         xor     eax, eax
  98.         push    edx
  99. .calc_bandwidth:
  100. ; 4e. Loop #4: add the bandwidth from the current list
  101. ; and advance to the next list, while there is one.
  102.         add     ax, [edx+ehci_static_ep.Bandwidths+ecx*2]
  103.         mov     edx, [edx+ehci_static_ep.NextList]
  104.         test    edx, edx
  105.         jnz     .calc_bandwidth
  106. ; 4f. Loop #4 end: restore counter of loop #3.
  107.         pop     edx
  108. ; 4g. Loop #3: update maximal bandwidth.
  109.         cmp     eax, edi
  110.         jb      @f
  111.         mov     edi, eax
  112. @@:
  113. ; 4h. Loop #3: advance the counter and repeat while within the first group.
  114.         lea     eax, [esi+ehci_controller.IntEDs+32*sizeof.ehci_static_ep-sizeof.ehci_controller]
  115.         add     edx, [.loop3_delta]
  116.         cmp     edx, eax
  117.         jb      .calc_max_bandwidth
  118. ; 4i. Loop #3 end: restore counter of loop #1.
  119.         pop     edx
  120. ; 4j. Loop #2: if the current variant is better (maybe not strictly)
  121. ; then the previous optimum, update the optimal bandwidth and the target.
  122.         cmp     edi, [.bandwidth]
  123.         ja      @f
  124.         jb      .update
  125.         cmp     ecx, [.targetsmask]
  126.         jb      @f
  127. .update:
  128.         mov     [.bandwidth], edi
  129.         mov     [.target], edx
  130.         mov     [.targetsmask], ecx
  131. @@:
  132. ; 4k. Loop #2: continue 8 times for every microframe.
  133.         inc     ecx
  134.         cmp     ecx, 8
  135.         jb      .varloop
  136. ; 4l. Loop #1: advance counter and repeat ebx times,
  137. ; ebx was calculated in step 3.
  138.         add     edx, sizeof.ehci_static_ep
  139.         dec     ebx
  140.         jnz     .varloop0
  141. ; 5. Calculate bandwidth for the new pipe.
  142.         mov     eax, [.maxpacket]
  143.         call    calc_hs_bandwidth
  144.         mov     ecx, [.maxpacket]
  145.         shr     ecx, 11
  146.         inc     ecx
  147.         and     ecx, 3
  148.         imul    eax, ecx
  149. ; 6. Get the pointer to the best list.
  150.         pop     edx             ; restore value from step 3
  151.         pop     edx             ; get delta calculated in step 3
  152.         add     edx, [.target]
  153. ; 7. Check that bandwidth for the new pipe plus old bandwidth
  154. ; still fits to maximum allowed by the core specification
  155. ; current [.bandwidth] + new bandwidth <= limit;
  156. ; USB2 specification allows maximum 60000*80% bit times for periodic microframe
  157.         mov     ecx, [.bandwidth]
  158.         add     ecx, eax
  159.         cmp     ecx, 48000
  160.         ja      .no_bandwidth
  161. ; 8. Convert {o|u}hci_static_ep to usb_static_ep, update bandwidth and return.
  162.         mov     ecx, [.targetsmask]
  163.         add     [edx+ehci_static_ep.Bandwidths+ecx*2], ax
  164.         add     edx, ehci_static_ep.SoftwarePart
  165.         movi    eax, 1
  166.         shl     eax, cl
  167.         pop     edi ebx         ; restore used registers to be stdcall
  168.         ret
  169. .no_bandwidth:
  170.         dbgstr 'Periodic bandwidth limit reached'
  171.         xor     eax, eax
  172.         xor     edx, edx
  173.         pop     edi ebx
  174.         ret
  175. .every_frame:
  176. ; The pipe should be scheduled every frame in two or more microframes.
  177. ; 9. Calculate maximal bandwidth for every microframe: three nested loops.
  178. ; 9a. The outermost loop: ebx = microframe to calculate.
  179.         xor     ebx, ebx
  180. .calc_all_bandwidths:
  181. ; 9b. The intermediate loop:
  182. ; edx = pointer to ehci_static_ep in the first group, [esp] = counter,
  183. ; edi = maximal bandwidth
  184.         lea     edx, [esi+ehci_controller.IntEDs-sizeof.ehci_controller]
  185.         xor     edi, edi
  186.         push    32
  187. .calc_max_bandwidth2:
  188. ; 9c. The innermost loop: calculate bandwidth for the given microframe
  189. ; in the given frame.
  190.         xor     eax, eax
  191.         push    edx
  192. .calc_bandwidth2:
  193.         add     ax, [edx+ehci_static_ep.Bandwidths+ebx*2]
  194.         mov     edx, [edx+ehci_static_ep.NextList]
  195.         test    edx, edx
  196.         jnz     .calc_bandwidth2
  197.         pop     edx
  198. ; 9d. The intermediate loop continued: update maximal bandwidth.
  199.         cmp     eax, edi
  200.         jb      @f
  201.         mov     edi, eax
  202. @@:
  203.         add     edx, sizeof.ehci_static_ep
  204.         dec     dword [esp]
  205.         jnz     .calc_max_bandwidth2
  206.         pop     eax
  207. ; 9e. Push the calculated maximal bandwidth and continue the outermost loop.
  208.         push    edi
  209.         inc     ebx
  210.         cmp     ebx, 8
  211.         jb      .calc_all_bandwidths
  212. virtual at esp
  213. .bandwidth7     dd      ?
  214. .bandwidth6     dd      ?
  215. .bandwidth5     dd      ?
  216. .bandwidth4     dd      ?
  217. .bandwidth3     dd      ?
  218. .bandwidth2     dd      ?
  219. .bandwidth1     dd      ?
  220. .bandwidth0     dd      ?
  221. end virtual
  222. ; 10. Select the best variant.
  223. ; edx = S-Mask = bitmask of scheduled microframes
  224.         movi    edx, 0x11
  225.         cmp     ecx, 1
  226.         ja      @f
  227.         mov     dl, 0x55
  228.         jz      @f
  229.         mov     dl, 0xFF
  230. @@:
  231. ; try all variants edx, edx shl 1, edx shl 2, ...
  232. ; while they fit in the lower byte (8 microframes per frame)
  233. .select_best_mframe:
  234.         xor     edi, edi
  235.         mov     ecx, edx
  236.         mov     eax, esp
  237. .calc_mframe:
  238.         add     cl, cl
  239.         jnc     @f
  240.         cmp     edi, [eax]
  241.         jae     @f
  242.         mov     edi, [eax]
  243. @@:
  244.         add     eax, 4
  245.         test    cl, cl
  246.         jnz     .calc_mframe
  247.         cmp     [.bandwidth], edi
  248.         jb      @f
  249.         mov     [.bandwidth], edi
  250.         mov     [.targetsmask], edx
  251. @@:
  252.         add     dl, dl
  253.         jnc     .select_best_mframe
  254. ; 11. Restore stack after step 9.
  255.         add     esp, 8*4
  256. ; 12. Get the pointer to the target list (responsible for every microframe).
  257.         lea     edx, [esi+ehci_controller.IntEDs.SoftwarePart+62*sizeof.ehci_static_ep-sizeof.ehci_controller]
  258. ; 13. Calculate bandwidth on the bus.
  259.         mov     eax, [.maxpacket]
  260.         call    calc_hs_bandwidth
  261.         mov     ecx, [.maxpacket]
  262.         shr     ecx, 11
  263.         inc     ecx
  264.         and     ecx, 3
  265.         imul    eax, ecx
  266. ; 14. Check that current [.bandwidth] + new bandwidth <= limit;
  267. ; USB2 specification allows maximum 60000*80% bit times for periodic microframe.
  268.         mov     ecx, [.bandwidth]
  269.         add     ecx, eax
  270.         cmp     ecx, 48000
  271.         ja      .no_bandwidth
  272. ; 15. Update bandwidths including the new pipe.
  273.         mov     ecx, [.targetsmask]
  274.         lea     edi, [edx+ehci_static_ep.Bandwidths-ehci_static_ep.SoftwarePart]
  275. .update_bandwidths:
  276.         shr     ecx, 1
  277.         jnc     @f
  278.         add     [edi], ax
  279. @@:
  280.         add     edi, 2
  281.         test    ecx, ecx
  282.         jnz     .update_bandwidths
  283. ; 16. Return target list and target S-Mask.
  284.         mov     eax, [.targetsmask]
  285.         pop     edi ebx         ; restore used registers to be stdcall
  286.         ret
  287. endp
  288.  
  289. ; Pipe is removing, update the corresponding lists.
  290. ; We do not reorder anything, so just update book-keeping variable
  291. ; in the list header.
  292. proc ehci_hs_interrupt_list_unlink
  293.         movzx   eax, word [ebx+ehci_pipe.Token-sizeof.ehci_pipe+2]
  294. ; calculate bandwidth
  295.         call    calc_hs_bandwidth
  296.         mov     ecx, [ebx+ehci_pipe.Flags-sizeof.ehci_pipe]
  297.         shr     ecx, 30
  298.         imul    eax, ecx
  299.         movzx   ecx, byte [ebx+ehci_pipe.Flags-sizeof.ehci_pipe]
  300. ; get target list
  301.         mov     edx, [ebx+usb_pipe.BaseList]
  302. ; update bandwidth
  303. .dec_bandwidth:
  304.         shr     ecx, 1
  305.         jnc     @f
  306.         sub     word [edx+ehci_static_ep.Bandwidths - ehci_static_ep.SoftwarePart], ax
  307. @@:
  308.         add     edx, 2
  309.         test    ecx, ecx
  310.         jnz     .dec_bandwidth
  311. ; return
  312.         ret
  313. endp
  314.  
  315. ; Helper procedure for USB2 scheduler: calculate bandwidth on the bus.
  316. ; in: low 11 bits of eax = payload size in bytes
  317. ; out: eax = maximal bandwidth in HS-bits
  318. proc calc_hs_bandwidth
  319.         and     eax, (1 shl 11) - 1     ; get payload for one transaction
  320.         add     eax, 3  ; add 3 bytes for other fields in data packet, PID+CRC16
  321. ; Multiply by 8 for bytes -> bits and then by 7/6 to accomodate bit stuffing;
  322. ; total 28/3 = 9+1/3
  323.         mov     edx, 55555556h
  324.         lea     ecx, [eax*9]
  325.         mul     edx
  326. ; Add 989 extra bits: 68 bits for Token packet (32 for SYNC, 24 for token+address,
  327. ; 4 extra bits for possible bit stuffing in token+address, 8 for EOP),
  328. ; 736 bits for bus turn-around, 40 bits for SYNC+EOP in Data packet,
  329. ; 8 bits for inter-packet delay, 49 bits for Handshake packet,
  330. ; 88 bits for another inter-packet delay.
  331.         lea     eax, [ecx+edx+989]
  332.         ret
  333. endp
  334.  
  335. ; Split-transaction scheduler (aka TT scheduler, TT stands for Transaction
  336. ; Translator, section 11.14 of the core spec) needs to schedule three event
  337. ; types on two buses: Start-Split and Complete-Split on HS bus and normal
  338. ; transaction on FS/LS bus.
  339. ; Assume that FS/LS bus is more restricted and more important to be scheduled
  340. ; uniformly, so select the variant which minimizes maximal used bandwidth
  341. ; on FS/LS bus and does not overflow HS bus.
  342. ; If there are several such variants, prefer variants which is closest to
  343. ; start of frame, and within the same microframe consider HS bandwidth
  344. ; utilization as a last criteria.
  345.  
  346. ; The procedure ehci_select_tt_interrupt_list has been splitted into several
  347. ; macro, each representing a logical step of the procedure,
  348. ; to simplify understanding what is going on. Consider all the following macro
  349. ; as logical parts of one procedure, they are meaningless outside the context.
  350.  
  351. ; Given a frame, calculate bandwidth occupied by already opened pipes
  352. ; in every microframe.
  353. ; Look for both HS and FS/LS buses: there are 16 words of information,
  354. ; 8 for HS bus, 8 for FS/LS bus, for every microframe.
  355. ; Since we count already opened pipes, the total bandwidth in every microframe
  356. ; is less than 60000 bits (and even 60000*80% bits), otherwise the scheduler
  357. ; would not allow to open those pipes.
  358. ; edi -> first list for the frame
  359. macro tt_calc_bandwidth_in_frame
  360. {
  361. local .lists, .pipes, .pipes_done, .carry
  362. ; 1. Zero everything.
  363.         xor     eax, eax
  364.         mov     edx, edi
  365. repeat 4
  366.         mov     dword [.budget+(%-1)*4], eax
  367. end repeat
  368. repeat 4
  369.         mov     dword [.hs_bandwidth+(%-1)*4], eax
  370. end repeat
  371.         mov     [.total_budget], ax
  372. ; Loop over all lists for the given frame.
  373. .lists:
  374. ; 2. Total HS bandwidth for all pipes in one list is kept inside list header,
  375. ; add it. Note that overflow is impossible, so we may add entire dwords.
  376.         mov     ebx, [edx+ehci_static_ep.SoftwarePart+usb_static_ep.NextVirt]
  377. repeat 4
  378.         mov     eax, dword [edx+ehci_static_ep.Bandwidths+(%-1)*4]
  379.         add     dword [.hs_bandwidth+(%-1)*4], eax
  380. end repeat
  381. ; Loop over all pipes in the given list.
  382.         add     edx, ehci_static_ep.SoftwarePart
  383. .pipes:
  384.         cmp     ebx, edx
  385.         jz      .pipes_done
  386. ; 3. For every pipe in every list for the given frame:
  387. ; 3a. Check whether the pipe resides on the same FS/LS bus as the new pipe.
  388. ; If not, skip this pipe.
  389.         mov     eax, [ebx+usb_pipe.DeviceData]
  390.         mov     eax, [eax+usb_device_data.TTHub]
  391.         cmp     eax, [.tthub]
  392.         jnz     @f
  393. ; 3b. Calculate FS/LS budget for the opened pipe.
  394. ; Note that eax = TTHub after 3a.
  395.         call    tt_calc_budget
  396. ; 3c. Update total budget: add the value from 3b
  397. ; to the budget of the first microframe scheduled for this pipe.
  398.         bsf     ecx, [ebx+ehci_pipe.Flags-sizeof.ehci_pipe]
  399.         add     [.budget+ecx*2], ax
  400. @@:
  401.         mov     ebx, [ebx+usb_pipe.NextVirt]
  402.         jmp     .pipes
  403. .pipes_done:
  404.         mov     edx, [edx+ehci_static_ep.NextList-ehci_static_ep.SoftwarePart]
  405.         test    edx, edx
  406.         jnz     .lists
  407. ; 4. If the budget for some microframe is exceeded, carry it to the following
  408. ; microframe(s). The actual size of one microframe is 187.5 raw bytes;
  409. ; the core spec says that 188 bytes should be scheduled in every microframe.
  410.         xor     eax, eax
  411.         xor     ecx, ecx
  412. .carry:
  413.         xor     edx, edx
  414.         add     ax, [.budget+ecx*2]
  415.         cmp     ax, 188
  416.         jbe     @f
  417.         mov     dx, ax
  418.         mov     ax, 188
  419.         sub     dx, ax
  420. @@:
  421.         mov     [.budget+ecx*2], ax
  422.         add     [.total_budget], ax
  423.         mov     ax, dx
  424.         inc     ecx
  425.         cmp     ecx, 8
  426.         jb      .carry
  427. }
  428.  
  429. ; Checks whether the new pipe fits in the existing FS budget
  430. ; starting from the given microframe. If not, mark the microframe
  431. ; as impossible for scheduling.
  432. ; in: ecx = microframe
  433. macro tt_exclude_microframe_if_no_budget
  434. {
  435. local .loop, .good, .bad
  436. ; 1. If the new budget plus the current budget does not exceed 188 bytes,
  437. ; the variant is possible.
  438.         mov     ax, [.budget+ecx*2]
  439.         mov     edx, ecx
  440.         add     ax, [.new_budget]
  441.         sub     ax, 188
  442.         jbe     .good
  443. ; 2. Otherwise,
  444. ; a) nothing should be scheduled in some following microframes,
  445. ; b) after adding the new budget everything should fit in first 6 microframes,
  446. ;    this guarantees that even in the worst case 90% limit is satisfied.
  447. .loop:
  448.         cmp     edx, 5
  449.         jae     .bad
  450.         cmp     [.budget+(edx+1)*2], 0
  451.         jnz     .bad
  452.         inc     edx
  453.         sub     ax, 188
  454.         ja      .loop
  455. .bad:
  456.         btr     [.possible_microframes], ecx
  457. .good:
  458. }
  459.  
  460. ; Calculate data corresponding to the particular scheduling variant for the new pipe.
  461. ; Data describe the current scheduling state collected over all frames touched
  462. ; by the given variant: maximal HS bandwidth, maximal FS/LS budget,
  463. ; which microframes fit in the current FS/LS budget for all frames.
  464. macro tt_calc_statistics_for_one_variant
  465. {
  466. local .frames, .microframes
  467. ; 1. Initialize: zero maximal bandwidth,
  468. ; first 6 microframes are possible for scheduling.
  469.         xor     eax, eax
  470. repeat 4
  471.         mov     dword [.max_hs_bandwidth+(%-1)*4], eax
  472. end repeat
  473.         mov     [.max_fs_bandwidth], ax
  474.         mov     [.possible_microframes], 0x3F
  475. ; Loop over all frames starting with [.variant] advancing by [.variant_delta].
  476.         mov     edi, [.variant]
  477. .frames:
  478. ; 2. Calculate statistics for one frame.
  479.         tt_calc_bandwidth_in_frame
  480. ; 3. Update maximal FS budget.
  481.         mov     ax, [.total_budget]
  482.         cmp     ax, [.max_fs_bandwidth]
  483.         jb      @f
  484.         mov     [.max_fs_bandwidth], ax
  485. @@:
  486. ; 4. For every microframe, update maximal HS bandwidth
  487. ; and check whether the microframe is allowed for scheduling.
  488.         xor     ecx, ecx
  489. .microframes:
  490.         mov     ax, [.hs_bandwidth+ecx*2]
  491.         cmp     ax, [.max_hs_bandwidth+ecx*2]
  492.         jb      @f
  493.         mov     [.max_hs_bandwidth+ecx*2], ax
  494. @@:
  495.         tt_exclude_microframe_if_no_budget
  496.         inc     ecx
  497.         cmp     ecx, 8
  498.         jb      .microframes
  499. ; Stop loop when outside of first descriptor group.
  500.         lea     eax, [esi+ehci_controller.IntEDs+32*sizeof.ehci_static_ep-sizeof.ehci_controller]
  501.         add     edi, [.variant_delta]
  502.         cmp     edi, eax
  503.         jb      .frames
  504. }
  505.  
  506. struct usb_split_info
  507. microframe_mask         dd      ?       ; lower byte is S-mask, second byte is C-mask
  508. ssplit_bandwidth        dd      ?
  509. csplit_bandwidth        dd      ?
  510. ends
  511.  
  512. ; Check whether the current variant and the current microframe are allowed
  513. ; for scheduling. If so, check whether they are better than the previously
  514. ; selected variant+microframe, if any. If so, update the previously selected
  515. ; variant+microframe to current ones.
  516. ; ecx = microframe, [.variant] = variant
  517. macro tt_check_variant_microframe
  518. {
  519. local .nothing, .update, .ssplit, .csplit, .csplit_done
  520. ; 1. If the current microframe does not fit in existing FS budget, do nothing.
  521.         bt      [.possible_microframes], ecx
  522.         jnc     .nothing
  523. ; 2. Calculate maximal HS bandwidth over all affected microframes.
  524. ; 2a. Start-split phase: one or more microframes starting with ecx,
  525. ; coded in lower byte of .info.microframe_mask.
  526.         xor     ebx, ebx
  527.         xor     edx, edx
  528. .ssplit:
  529.         lea     eax, [ecx+edx]
  530.         movzx   eax, [.max_hs_bandwidth+eax*2]
  531.         add     eax, [.info.ssplit_bandwidth]
  532.         cmp     ebx, eax
  533.         ja      @f
  534.         mov     ebx, eax
  535. @@:
  536.         inc     edx
  537.         bt      [.info.microframe_mask], edx
  538.         jc      .ssplit
  539. ; 2b. Complete-split phase: zero or more microframes starting with
  540. ; ecx+(last start-split microframe)+2,
  541. ; coded in second byte of .info.microframe_mask.
  542.         add     edx, 8
  543. .csplit:
  544.         inc     edx
  545.         bt      [.info.microframe_mask], edx
  546.         jnc     .csplit_done
  547.         lea     eax, [ecx+edx]
  548.         cmp     eax, 8
  549.         jae     .csplit_done
  550.         movzx   eax, [.max_hs_bandwidth+(eax-8)*2]
  551.         add     eax, [.info.csplit_bandwidth]
  552.         cmp     ebx, eax
  553.         ja      .csplit
  554.         mov     ebx, eax
  555.         jmp     .csplit
  556. .csplit_done:
  557. ; 3. Check that current HS bandwidth + new bandwidth <= limit;
  558. ; USB2 specification allows maximum 60000*80% bit times for periodic microframe.
  559.         cmp     ebx, 48000
  560.         ja      .nothing
  561. ; 4. This variant is possible for scheduling.
  562. ; Check whether it is better than the currently selected one.
  563. ; 4a. The primary criteria: FS/LS bandwidth.
  564.         mov     ax, [.max_fs_bandwidth]
  565.         cmp     ax, [.best_fs_bandwidth]
  566.         ja      .nothing
  567.         jb      .update
  568. ; 4b. The secondary criteria: prefer microframes which are closer to start of frame.
  569.         cmp     ecx, [.targetsmask]
  570.         ja      .nothing
  571.         jb      .update
  572. ; 4c. The last criteria: HS bandwidth.
  573.         cmp     ebx, [.bandwidth]
  574.         ja      .nothing
  575. .update:
  576. ; 5. This variant is better than the previously selected.
  577. ; Update the best variant with current data.
  578.         mov     [.best_fs_bandwidth], ax
  579.         mov     [.bandwidth], ebx
  580.         mov     [.targetsmask], ecx
  581.         mov     eax, [.variant]
  582.         mov     [.target], eax
  583. .nothing:
  584. }
  585.  
  586. ; TT scheduler: add new pipe.
  587. ; in: esi -> usb_controller, edi -> usb_pipe
  588. ; out: edx -> usb_static_ep, eax = S-Mask
  589. proc ehci_select_tt_interrupt_list
  590. virtual at ebp-12-.local_vars_size
  591. .local_vars_start:
  592. .info                   usb_split_info
  593. .new_budget             dw      ?
  594. .total_budget           dw      ?
  595. .possible_microframes   dd      ?
  596. .tthub                  dd      ?
  597. .budget                 rw      8
  598. .hs_bandwidth           rw      8
  599. .max_hs_bandwidth       rw      8
  600. .max_fs_bandwidth       dw      ?
  601. .best_fs_bandwidth      dw      ?
  602. .variant                dd      ?
  603. .variant_delta          dd      ?
  604. .target_delta           dd      ?
  605. .local_vars_size = $ - .local_vars_start
  606. if .local_vars_size > 24*4
  607. err Modify stack frame size in
  608. end if
  609.  
  610. .targetsmask    dd      ?
  611. .bandwidth      dd      ?
  612. .target         dd      ?
  613.                 dd      ?
  614.                 dd      ?
  615. .config_pipe    dd      ?
  616. .endpoint       dd      ?
  617. .maxpacket      dd      ?
  618. .type           dd      ?
  619. .interval       dd      ?
  620. end virtual
  621.         mov     eax, [edi+ehci_pipe.Token-sizeof.ehci_pipe]
  622.         shr     eax, 16
  623.         and     eax, (1 shl 11) - 1
  624.         push    ebx edi
  625. ; 1. Compute the real interval. FS/LS devices encode the interval as
  626. ; number of milliseconds. Use the maximal power of two that is not greater than
  627. ; the given interval and EHCI scheduling area = 32 frames.
  628.         cmp     [.interval], 1
  629.         adc     [.interval], 0
  630.         mov     ecx, 64
  631.         mov     eax, 64 * sizeof.ehci_static_ep
  632. @@:
  633.         shr     ecx, 1
  634.         cmp     [.interval], ecx
  635.         jb      @b
  636.         mov     [.interval], ecx
  637. ; 2. Compute variables for further calculations.
  638. ; 2a. [.variant_delta] is delta between two lists from the first group
  639. ; that correspond to the same variant.
  640.         imul    ecx, sizeof.ehci_static_ep
  641.         mov     [.variant_delta], ecx
  642. ; 2b. [.target_delta] is delta between the final answer from the group
  643. ; corresponding to [.interval] and the item from the first group.
  644.         sub     eax, ecx
  645.         sub     eax, ecx
  646.         mov     [.target_delta], eax
  647. ; 2c. [.variant] is the first list from the first group that corresponds
  648. ; to the current variant.
  649.         lea     eax, [esi+ehci_controller.IntEDs-sizeof.ehci_controller]
  650.         mov     [.variant], eax
  651. ; 2d. [.tthub] identifies TT hub for new pipe, [.new_budget] is FS budget
  652. ; for new pipe.
  653.         mov     eax, [edi+usb_pipe.DeviceData]
  654.         mov     eax, [eax+usb_device_data.TTHub]
  655.         mov     ebx, edi
  656.         mov     [.tthub], eax
  657.         call    tt_calc_budget
  658.         mov     [.new_budget], ax
  659. ; 2e. [.usb_split_info] describes bandwidth used by new pipe on HS bus.
  660.         lea     edi, [.info]
  661.         call    tt_fill_split_info
  662.         test    eax, eax
  663.         jz      .no_bandwidth
  664. ; 2f. There is no best variant yet, put maximal possible values,
  665. ; so any variant would be better than the "current".
  666.         or      [.best_fs_bandwidth], -1
  667.         or      [.target], -1
  668.         or      [.bandwidth], -1
  669.         or      [.targetsmask], -1
  670. ; 3. Loop over all variants, for every variant decide whether it is acceptable,
  671. ; select the best variant from all acceptable variants.
  672. .check_variants:
  673.         tt_calc_statistics_for_one_variant
  674.         xor     ecx, ecx
  675. .check_microframes:
  676.         tt_check_variant_microframe
  677.         inc     ecx
  678.         cmp     ecx, 6
  679.         jb      .check_microframes
  680.         add     [.variant], sizeof.ehci_static_ep
  681.         dec     [.interval]
  682.         jnz     .check_variants
  683. ; 4. If there is no acceptable variants, return error.
  684.         mov     ecx, [.targetsmask]
  685.         mov     edx, [.target]
  686.         cmp     ecx, -1
  687.         jz      .no_bandwidth
  688. ; 5. Calculate the answer: edx -> selected list, eax = S-Mask and C-Mask.
  689.         mov     eax, [.info.microframe_mask]
  690.         add     edx, [.target_delta]
  691.         shl     eax, cl
  692.         and     eax, 0xFFFF
  693. ; 6. Update HS bandwidths in the selected list.
  694.         xor     ecx, ecx
  695.         mov     ebx, [.info.ssplit_bandwidth]
  696. .update_ssplit:
  697.         bt      eax, ecx
  698.         jnc     @f
  699.         add     [edx+ehci_static_ep.Bandwidths+ecx*2], bx
  700. @@:
  701.         inc     ecx
  702.         cmp     ecx, 8
  703.         jb      .update_ssplit
  704.         mov     ebx, [.info.csplit_bandwidth]
  705. .update_csplit:
  706.         bt      eax, ecx
  707.         jnc     @f
  708.         add     [edx+ehci_static_ep.Bandwidths+(ecx-8)*2], bx
  709. @@:
  710.         inc     ecx
  711.         cmp     ecx, 16
  712.         jb      .update_csplit
  713. ; 7. Return.
  714.         add     edx, ehci_static_ep.SoftwarePart
  715.         pop     edi ebx
  716.         ret
  717. .no_bandwidth:
  718.         dbgstr 'Periodic bandwidth limit reached'
  719.         xor     eax, eax
  720.         xor     edx, edx
  721.         pop     edi ebx
  722.         ret
  723. endp
  724.  
  725. ; Pipe is removing, update the corresponding lists.
  726. ; We do not reorder anything, so just update book-keeping variable
  727. ; in the list header.
  728. proc ehci_fs_interrupt_list_unlink
  729. ; calculate bandwidth
  730.         push    edi
  731.         sub     esp, sizeof.usb_split_info
  732.         mov     edi, esp
  733.         call    tt_fill_split_info
  734. ; get target list
  735.         mov     edx, [ebx+usb_pipe.BaseList]
  736. ; update bandwidth for Start-Split
  737.         mov     eax, [edi+usb_split_info.ssplit_bandwidth]
  738.         xor     ecx, ecx
  739. .dec_bandwidth_1:
  740.         bt      [ebx+ehci_pipe.Flags-sizeof.ehci_pipe], ecx
  741.         jnc     @f
  742.         sub     word [edx+ecx*2+ehci_static_ep.Bandwidths - ehci_static_ep.SoftwarePart], ax
  743. @@:
  744.         inc     ecx
  745.         cmp     ecx, 8
  746.         jb      .dec_bandwidth_1
  747. ; update bandwidth for Complete-Split
  748.         mov     eax, [edi+usb_split_info.csplit_bandwidth]
  749. .dec_bandwidth_2:
  750.         bt      [ebx+ehci_pipe.Flags-sizeof.ehci_pipe], ecx
  751.         jnc     @f
  752.         sub     word [edx+(ecx-8)*2+ehci_static_ep.Bandwidths - ehci_static_ep.SoftwarePart], ax
  753. @@:
  754.         inc     ecx
  755.         cmp     ecx, 16
  756.         jb      .dec_bandwidth_2
  757.         add     esp, sizeof.usb_split_info
  758.         pop     edi
  759.         ret
  760. endp
  761.  
  762. ; Helper procedure for ehci_select_tt_interrupt_list.
  763. ; Calculates "best-case budget" according to the core spec,
  764. ; that is, number of bytes (not bits) corresponding to "optimistic" transaction
  765. ; time, including inter-packet delays/bus turn-around time,
  766. ; but without bit stuffing and timers drift.
  767. ; One extra TT-specific delay is added: TT think time from the hub descriptor.
  768. ; Similar to calc_usb1_bandwidth with corresponding changes.
  769. ; eax -> usb_hub with TT, ebx -> usb_pipe
  770. proc tt_calc_budget
  771.         invoke  usbhc_api.usb_get_tt_think_time ; ecx = TT think time in FS-bytes
  772.         mov     eax, [ebx+ehci_pipe.Token-sizeof.ehci_pipe]
  773.         shr     eax, 16
  774.         and     eax, (1 shl 11) - 1     ; get data length
  775.         bt      [ebx+ehci_pipe.Token-sizeof.ehci_pipe], 12
  776.         jc      .low_speed
  777. ; Full-speed interrupt IN/OUT:
  778. ; 33 bits for Token packet (8 for SYNC, 24 for token+address, 3 for EOP),
  779. ; 18 bits for bus turn-around, 11 bits for SYNC+EOP in Data packet,
  780. ; 2 bits for inter-packet delay, 19 bits for Handshake packet,
  781. ; 2 bits for another inter-packet delay. 85 bits total, pad to 11 bytes.
  782.         lea     eax, [eax+11+ecx]
  783. ; 1 byte is minimal TT think time in addition to ecx.
  784.         ret
  785. .low_speed:
  786. ; Low-speed interrupt IN/OUT:
  787. ; multiply by 8 for LS -> FS,
  788. ; add 85 bytes as in full-speed interrupt and extra 5 bytes for two PRE packets
  789. ; and two hub delays.
  790. ; 1 byte is minimal TT think time in addition to ecx.
  791.         lea     eax, [eax*8+90+ecx]
  792.         ret
  793. endp
  794.  
  795. ; Helper procedure for TT scheduler.
  796. ; Calculates Start-Split/Complete-Split masks and HS bandwidths.
  797. ; ebx -> usb_pipe, edi -> usb_split_info
  798. proc tt_fill_split_info
  799. ; Interrupt endpoints.
  800. ; The core spec says in 5.7.3 "Interrupt Transfer Packet Size Constraints" that:
  801. ; The maximum allowable interrupt data payload size is 64 bytes or less for full-speed.
  802. ; Low-speed devices are limited to eight bytes or less maximum data payload size.
  803. ; This is important for scheduling, it guarantees that in any case transaction fits
  804. ; in two microframes (usually one, two if transaction has started too late in the first
  805. ; microframe), so check it.
  806.         mov     eax, [ebx+ehci_pipe.Token-sizeof.ehci_pipe]
  807.         mov     ecx, 8
  808.         bt      eax, 12
  809.         jc      @f
  810.         mov     ecx, 64
  811. @@:
  812.         shr     eax, 16
  813.         and     eax, (1 shl 11) - 1     ; get data length
  814.         cmp     eax, ecx
  815.         ja      .error
  816.         add     eax, 3  ; add 3 bytes for other fields in data packet, PID+CRC16
  817. ; Multiply by 8 for bytes -> bits and then by 7/6 to accomodate bit stuffing;
  818. ; total 28/3 = 9+1/3
  819.         mov     edx, 55555556h
  820.         lea     ecx, [eax*9]
  821.         mul     edx
  822. ; One start-split, three complete-splits (unless the last is too far,
  823. ; but this is handled by the caller).
  824.         mov     eax, [ebx+usb_pipe.LastTD]
  825.         mov     [edi+usb_split_info.microframe_mask], 0x1C01
  826. ; Structure and HS bandwidth of packets depends on the direction.
  827.         bt      [eax+ehci_gtd.Token-sizeof.ehci_gtd], 8
  828.         jc      .interrupt_in
  829. .interrupt_out:
  830. ; Start-Split phase:
  831. ; 77 bits for SPLIT packet (32 for SYNC, 8 for EOP, 32 for data, 5 for bit stuffing),
  832. ; 88 bits for inter-packet delay, 68 bits for Token packet,
  833. ; 88 bits for inter-packet delay, 40 bits for SYNC+EOP in Data packet,
  834. ; 88 bits for last inter-packet delay, total 449 bits.
  835.         lea     eax, [edx+ecx+449]
  836.         mov     [edi+usb_split_info.ssplit_bandwidth], eax
  837. ; Complete-Split phase:
  838. ; 77 bits for SPLIT packet,
  839. ; 88 bits for inter-packet delay, 68 bits for Token packet,
  840. ; 736 bits for bus turn-around, 49 bits for Handshake packet,
  841. ; 8 bits for inter-packet delay, total 1026 bits.
  842.         mov     [edi+usb_split_info.csplit_bandwidth], 1026
  843.         ret
  844. .interrupt_in:
  845. ; Start-Split phase:
  846. ; 77 bits for SPLIT packet, 88 bits for inter-packet delay,
  847. ; 68 bits for Token packet, 88 bits for another inter-packet delay,
  848. ; total 321 bits.
  849.         mov     [edi+usb_split_info.ssplit_bandwidth], 321
  850. ; Complete-Split phase:
  851. ; 77 bits for SPLIT packet, 88 bits for inter-packet delay,
  852. ; 68 bits for Token packet, 736 bits for bus turn-around,
  853. ; 40 bits for SYNC+EOP in Data packet, 8 bits for inter-packet delay,
  854. ; total 1017 bits.
  855.         lea     eax, [edx+ecx+1017]
  856.         mov     [edi+usb_split_info.csplit_bandwidth], eax
  857.         ret
  858. .error:
  859.         xor     eax, eax
  860.         ret
  861. endp
  862.