Subversion Repositories Kolibri OS

Rev

Rev 9715 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  2. ;;                                                              ;;
  3. ;; Copyright (C) KolibriOS team 2004-2022. All rights reserved. ;;
  4. ;; Distributed under terms of the GNU General Public License    ;;
  5. ;;                                                              ;;
  6. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  7.  
  8. $Revision: 9950 $
  9.  
  10. ; Initializes PAT (Page Attribute Table) and MTRRs.
  11. proc init_pat_mtrr
  12.         cmp     [BOOT.mtrr], byte 2
  13.         je      .exit
  14.  
  15.         bt      [cpu_caps], CAPS_PAT    ; if PAT is not supported, use MTRR
  16.         jnc     .use_mtrr
  17.  
  18. ; Change PAT_MSR for write combining memory.
  19.         mov     ecx, MSR_CR_PAT
  20.         mov     eax, PAT_VALUE          ; UC UCM WC WB
  21.         mov     edx, eax
  22.         wrmsr
  23.         ret
  24.  
  25. .use_mtrr:
  26.         bt      [cpu_caps], CAPS_MTRR
  27.         jnc     .exit
  28.  
  29.         call    mtrr_reconfigure
  30.         stdcall set_mtrr, [LFBAddress], 0x1000000, MEM_WC
  31.  
  32. .exit:
  33.         ret
  34. endp
  35.  
  36. ; Helper procedure for mtrr_reconfigure and set_mtrr,
  37. ; called before changes in MTRRs.
  38. ; 1. disable and flush caches
  39. ; 2. clear PGE bit in cr4
  40. ; 3. flush TLB
  41. ; 4. disable mtrr
  42.  
  43. proc mtrr_begin_change
  44.         mov     eax, cr0
  45.         or      eax, 0x60000000 ;disable caching
  46.         mov     cr0, eax
  47.         wbinvd                  ;invalidate cache
  48.  
  49.         bt      [cpu_caps], CAPS_PGE
  50.         jnc     .cr3_flush
  51.  
  52.         mov     eax, cr4
  53.         btr     eax, 7          ;clear cr4.PGE
  54.         mov     cr4, eax        ;flush TLB
  55.         jmp     @F              ;skip extra serialization
  56.  
  57. .cr3_flush:
  58.         mov     eax, cr3
  59.         mov     cr3, eax        ;flush TLB
  60. @@:
  61.         mov     ecx, MSR_MTRR_DEF_TYPE
  62.         rdmsr
  63.         btr     eax, 11         ;clear enable flag
  64.         wrmsr                   ;disable mtrr
  65.         ret
  66. endp
  67.  
  68. ; Helper procedure for mtrr_reconfigure and set_mtrr,
  69. ; called after changes in MTRRs.
  70. ; 1. enable mtrr
  71. ; 2. flush all caches
  72. ; 3. flush TLB
  73. ; 4. restore cr4.PGE flag, if required
  74.  
  75. proc mtrr_end_change
  76.         mov     ecx, MSR_MTRR_DEF_TYPE
  77.         rdmsr
  78.         or      ah, 8           ; enable variable-ranges MTRR
  79.         and     al, 0xF0        ; default memtype = UC
  80.         wrmsr
  81.  
  82.         wbinvd                  ;again invalidate
  83.         mov     eax, cr0
  84.         and     eax, not 0x60000000
  85.         mov     cr0, eax        ; enable caching
  86.  
  87.         mov     eax, cr3
  88.         mov     cr3, eax        ;flush tlb
  89.  
  90.         bt      [cpu_caps], CAPS_PGE
  91.         jnc     @F
  92.  
  93.         mov     eax, cr4
  94.         bts     eax, 7          ;set cr4.PGE flag
  95.         mov     cr4, eax
  96. @@:
  97.         ret
  98. endp
  99.  
  100. ; Some limits to number of structures located in the stack.
  101. MAX_USEFUL_MTRRS = 16
  102. MAX_RANGES = 16
  103.  
  104. ; mtrr_reconfigure keeps a list of MEM_WB ranges.
  105. ; This structure describes one item in the list.
  106. struct mtrr_range
  107. next            dd      ?       ; next item
  108. start           dq      ?       ; first byte
  109. length          dq      ?       ; length in bytes
  110. ends
  111.  
  112. uglobal
  113. align 4
  114. num_variable_mtrrs      dd      0       ; number of variable-range MTRRs
  115. endg
  116.  
  117. ; Helper procedure for MTRR initialization.
  118. ; Takes MTRR configured by BIOS and tries to recongifure them
  119. ; in order to allow non-UC data at top of 4G memory.
  120. ; Example: if low part of physical memory is 3.5G = 0xE0000000 bytes wide,
  121. ; BIOS can configure two MTRRs so that the first MTRR describes [0, 4G) as WB
  122. ; and the second MTRR describes [3.5G, 4G) as UC;
  123. ; WB+UC=UC, so the resulting memory map would be as needed,
  124. ; but in this configuration our attempts to map LFB at (say) 0xE8000000 as WC
  125. ; would be ignored, WB+UC+WC is still UC.
  126. ; So we must keep top of 4G memory not covered by MTRRs,
  127. ; using three WB MTRRs [0,2G) + [2G,3G) + [3G,3.5G),
  128. ; this gives the same memory map, but allows to add further entries.
  129. ; See mtrrtest.asm for detailed input/output from real hardware+BIOS.
  130. proc mtrr_reconfigure
  131.         push    ebp     ; we're called from init_LFB, and it feels hurt when ebp is destroyed
  132. ; 1. Prepare local variables.
  133. ; 1a. Create list of MAX_RANGES free (aka not yet allocated) ranges.
  134.         xor     eax, eax
  135.         lea     ecx, [eax + MAX_RANGES]
  136. .init_ranges:
  137.         sub     esp, sizeof.mtrr_range - 4
  138.         push    eax
  139.         mov     eax, esp
  140.         dec     ecx
  141.         jnz     .init_ranges
  142.         mov     eax, esp
  143. ; 1b. Fill individual local variables.
  144.         xor     edx, edx
  145.         sub     esp, MAX_USEFUL_MTRRS * 16      ; .mtrrs
  146.         push    edx             ; .mtrrs_end
  147.         push    edx             ; .num_used_mtrrs
  148.         push    eax             ; .first_free_range
  149.         push    edx             ; .first_range: no ranges yet
  150.         mov     cl, [cpu_phys_addr_width]
  151.         or      eax, -1
  152.         shl     eax, cl ; note: this uses cl&31 = cl-32, not the entire cl
  153.         push    eax     ; .phys_reserved_mask
  154. virtual at esp
  155. .phys_reserved_mask     dd      ?
  156. .first_range            dd      ?
  157. .first_free_range       dd      ?
  158. .num_used_mtrrs         dd      ?
  159. .mtrrs_end              dd      ?
  160. .mtrrs          rq      MAX_USEFUL_MTRRS * 2
  161. .local_vars_size = $ - esp
  162. end virtual
  163.  
  164. ; 2. Get the number of variable-range MTRRs from MTRRCAP register.
  165. ; Abort if zero.
  166.         mov     ecx, 0xFE
  167.         rdmsr
  168.         test    al, al
  169.         jz      .abort
  170.         mov     byte [num_variable_mtrrs], al
  171. ; 3. Validate MTRR_DEF_TYPE register.
  172.         mov     ecx, 0x2FF
  173.         rdmsr
  174. ; If BIOS has not initialized variable-range MTRRs, fallback to step 7.
  175.         test    ah, 8
  176.         jz      .fill_ranges_from_memory_map
  177. ; If the default memory type (not covered by MTRRs) is not UC,
  178. ; then probably BIOS did something strange, so it is better to exit immediately
  179. ; hoping for the best.
  180.         cmp     al, MEM_UC
  181.         jnz     .abort
  182. ; 4. Validate all variable-range MTRRs
  183. ; and copy configured MTRRs to the local array [.mtrrs].
  184. ; 4a. Prepare for the loop over existing variable-range MTRRs.
  185.         mov     ecx, 0x200
  186.         lea     edi, [.mtrrs]
  187. .get_used_mtrrs_loop:
  188. ; 4b. For every MTRR, read PHYSBASEn and PHYSMASKn.
  189. ; In PHYSBASEn, clear upper bits and copy to ebp:ebx.
  190.         rdmsr
  191.         or      edx, [.phys_reserved_mask]
  192.         xor     edx, [.phys_reserved_mask]
  193.         mov     ebp, edx
  194.         mov     ebx, eax
  195.         inc     ecx
  196. ; If PHYSMASKn is not active, ignore this MTRR.
  197.         rdmsr
  198.         inc     ecx
  199.         test    ah, 8
  200.         jz      .get_used_mtrrs_next
  201. ; 4c. For every active MTRR, check that number of local entries is not too large.
  202.         inc     [.num_used_mtrrs]
  203.         cmp     [.num_used_mtrrs], MAX_USEFUL_MTRRS
  204.         ja      .abort
  205. ; 4d. For every active MTRR, store PHYSBASEn with upper bits cleared.
  206. ; This contains the MTRR base and the memory type in low byte.
  207.         mov     [edi], ebx
  208.         mov     [edi+4], ebp
  209. ; 4e. For every active MTRR, check that the range is continuous:
  210. ; PHYSMASKn with upper bits set must be negated power of two, and
  211. ; low bits of PHYSBASEn must be zeroes:
  212. ; PHYSMASKn = 1...10...0,
  213. ; PHYSBASEn = x...x0...0,
  214. ; this defines a continuous range from x...x0...0 to x...x1...1,
  215. ; length = 10...0 = negated PHYSMASKn.
  216. ; Store length in the local array.
  217.         and     eax, not 0xFFF
  218.         or      edx, [.phys_reserved_mask]
  219.         mov     dword [edi+8], 0
  220.         mov     dword [edi+12], 0
  221.         sub     [edi+8], eax
  222.         sbb     [edi+12], edx
  223. ; (x and -x) is the maximum power of two that divides x.
  224. ; Condition for powers of two: (x and -x) equals x.
  225.         and     eax, [edi+8]
  226.         and     edx, [edi+12]
  227.         cmp     eax, [edi+8]
  228.         jnz     .abort
  229.         cmp     edx, [edi+12]
  230.         jnz     .abort
  231.         sub     eax, 1
  232.         sbb     edx, 0
  233.         and     eax, not 0xFFF
  234.         and     eax, ebx
  235.         jnz     .abort
  236.         and     edx, ebp
  237.         jnz     .abort
  238. ; 4f. For every active MTRR, validate memory type: it must be either WB or UC.
  239.         add     edi, 16
  240.         cmp     bl, MEM_UC
  241.         jz      .get_used_mtrrs_next
  242.         cmp     bl, MEM_WB
  243.         jnz     .abort
  244. .get_used_mtrrs_next:
  245. ; 4g. Repeat the loop at 4b-4f for all [num_variable_mtrrs] entries.
  246.         mov     eax, [num_variable_mtrrs]
  247.         lea     eax, [0x200+eax*2]
  248.         cmp     ecx, eax
  249.         jb      .get_used_mtrrs_loop
  250. ; 4h. If no active MTRRs were detected, fallback to step 7.
  251.         cmp     [.num_used_mtrrs], 0
  252.         jz      .fill_ranges_from_memory_map
  253.         mov     [.mtrrs_end], edi
  254. ; 5. Generate sorted list of ranges marked as WB.
  255. ; 5a. Prepare for the loop over configured MTRRs filled at step 4.
  256.         lea     ecx, [.mtrrs]
  257. .fill_wb_ranges:
  258. ; 5b. Ignore non-WB MTRRs.
  259.         mov     ebx, [ecx]
  260.         cmp     bl, MEM_WB
  261.         jnz     .next_wb_range
  262.         mov     ebp, [ecx+4]
  263.         and     ebx, not 0xFFF  ; clear memory type and reserved bits
  264. ; ebp:ebx = start of the range described by the current MTRR.
  265. ; 5c. Find the first existing range containing a point greater than ebp:ebx.
  266.         lea     esi, [.first_range]
  267. .find_range_wb:
  268. ; If there is no next range or start of the next range is greater than ebp:ebx,
  269. ; exit the loop to 5d.
  270.         mov     edi, [esi]
  271.         test    edi, edi
  272.         jz      .found_place_wb
  273.         mov     eax, ebx
  274.         mov     edx, ebp
  275.         sub     eax, dword [edi + mtrr_range.start]
  276.         sbb     edx, dword [edi + mtrr_range.start+4]
  277.         jb      .found_place_wb
  278. ; Otherwise, if end of the next range is greater than or equal to ebp:ebx,
  279. ; exit the loop to 5e.
  280.         mov     esi, edi
  281.         sub     eax, dword [edi + mtrr_range.length]
  282.         sbb     edx, dword [edi + mtrr_range.length+4]
  283.         jb      .expand_wb
  284.         or      eax, edx
  285.         jnz     .find_range_wb
  286.         jmp     .expand_wb
  287. .found_place_wb:
  288. ; 5d. ebp:ebx is not within any existing range.
  289. ; Insert a new range between esi and edi.
  290. ; (Later, during 5e, it can be merged with the following ranges.)
  291.         mov     eax, [.first_free_range]
  292.         test    eax, eax
  293.         jz      .abort
  294.         mov     [esi], eax
  295.         mov     edx, [eax + mtrr_range.next]
  296.         mov     [.first_free_range], edx
  297.         mov     dword [eax + mtrr_range.start], ebx
  298.         mov     dword [eax + mtrr_range.start+4], ebp
  299. ; Don't fill [eax+mtrr_range.next] and [eax+mtrr_range.length] yet,
  300. ; they will be calculated including merges at step 5e.
  301.         mov     esi, edi
  302.         mov     edi, eax
  303. .expand_wb:
  304. ; 5e. The range at edi contains ebp:ebx, and esi points to the first range
  305. ; to be checked for merge: esi=edi if ebp:ebx was found in an existing range,
  306. ; esi is next after edi if a new range with ebp:ebx was created.
  307. ; Merge it with following ranges while start of the next range is not greater
  308. ; than the end of the new range.
  309.         add     ebx, [ecx+8]
  310.         adc     ebp, [ecx+12]
  311. ; ebp:ebx = end of the range described by the current MTRR.
  312. .expand_wb_loop:
  313. ; If there is no next range or start of the next range is greater than ebp:ebx,
  314. ; exit the loop to 5g.
  315.         test    esi, esi
  316.         jz      .expand_wb_done
  317.         mov     eax, ebx
  318.         mov     edx, ebp
  319.         sub     eax, dword [esi + mtrr_range.start]
  320.         sbb     edx, dword [esi + mtrr_range.start+4]
  321.         jb      .expand_wb_done
  322. ; Otherwise, if end of the next range is greater than or equal to ebp:ebx,
  323. ; exit the loop to 5f.
  324.         sub     eax, dword [esi + mtrr_range.length]
  325.         sbb     edx, dword [esi + mtrr_range.length+4]
  326.         jb      .expand_wb_last
  327. ; Otherwise, the current range is completely within the new range.
  328. ; Free it and continue the loop.
  329.         mov     edx, [esi + mtrr_range.next]
  330.         cmp     esi, edi
  331.         jz      @f
  332.         mov     eax, [.first_free_range]
  333.         mov     [esi + mtrr_range.next], eax
  334.         mov     [.first_free_range], esi
  335. @@:
  336.         mov     esi, edx
  337.         jmp     .expand_wb_loop
  338. .expand_wb_last:
  339. ; 5f. Start of the new range is inside range described by esi,
  340. ; end of the new range is inside range described by edi.
  341. ; If esi is equal to edi, the new range is completely within
  342. ; an existing range, so proceed to the next range.
  343.         cmp     esi, edi
  344.         jz      .next_wb_range
  345. ; Otherwise, set end of interval at esi to end of interval at edi
  346. ; and free range described by edi.
  347.         mov     ebx, dword [esi + mtrr_range.start]
  348.         mov     ebp, dword [esi + mtrr_range.start+4]
  349.         add     ebx, dword [esi + mtrr_range.length]
  350.         adc     ebp, dword [esi + mtrr_range.length+4]
  351.         mov     edx, [esi + mtrr_range.next]
  352.         mov     eax, [.first_free_range]
  353.         mov     [esi + mtrr_range.next], eax
  354.         mov     [.first_free_range], esi
  355.         mov     esi, edx
  356. .expand_wb_done:
  357. ; 5g. We have found the next range (maybe 0) after merging and
  358. ; the new end of range (maybe ebp:ebx from the new range
  359. ; or end of another existing interval calculated at step 5f).
  360. ; Write them to range at edi.
  361.         mov     [edi + mtrr_range.next], esi
  362.         sub     ebx, dword [edi + mtrr_range.start]
  363.         sbb     ebp, dword [edi + mtrr_range.start+4]
  364.         mov     dword [edi + mtrr_range.length], ebx
  365.         mov     dword [edi + mtrr_range.length+4], ebp
  366. .next_wb_range:
  367. ; 5h. Continue the loop 5b-5g over all configured MTRRs.
  368.         add     ecx, 16
  369.         cmp     ecx, [.mtrrs_end]
  370.         jb      .fill_wb_ranges
  371. ; 6. Exclude all ranges marked as UC.
  372. ; 6a. Prepare for the loop over configured MTRRs filled at step 4.
  373.         lea     ecx, [.mtrrs]
  374. .fill_uc_ranges:
  375. ; 6b. Ignore non-UC MTRRs.
  376.         mov     ebx, [ecx]
  377.         cmp     bl, MEM_UC
  378.         jnz     .next_uc_range
  379.         mov     ebp, [ecx+4]
  380.         and     ebx, not 0xFFF  ; clear memory type and reserved bits
  381. ; ebp:ebx = start of the range described by the current MTRR.
  382.         lea     esi, [.first_range]
  383. ; 6c. Find the first existing range containing a point greater than ebp:ebx.
  384. .find_range_uc:
  385. ; If there is no next range, ignore this MTRR,
  386. ; exit the loop and continue to next MTRR.
  387.         mov     edi, [esi]
  388.         test    edi, edi
  389.         jz      .next_uc_range
  390. ; If start of the next range is greater than or equal to ebp:ebx,
  391. ; exit the loop to 6e.
  392.         mov     eax, dword [edi + mtrr_range.start]
  393.         mov     edx, dword [edi + mtrr_range.start+4]
  394.         sub     eax, ebx
  395.         sbb     edx, ebp
  396.         jnb     .truncate_uc
  397. ; Otherwise, continue the loop if end of the next range is less than ebp:ebx,
  398. ; exit the loop to 6d otherwise.
  399.         mov     esi, edi
  400.         add     eax, dword [edi + mtrr_range.length]
  401.         adc     edx, dword [edi + mtrr_range.length+4]
  402.         jnb     .find_range_uc
  403. ; 6d. ebp:ebx is inside (or at end of) an existing range.
  404. ; Split the range. (The second range, maybe containing completely within UC-range,
  405. ; maybe of zero length, can be removed at step 6e, if needed.)
  406.         mov     edi, [.first_free_range]
  407.         test    edi, edi
  408.         jz      .abort
  409.         mov     dword [edi + mtrr_range.start], ebx
  410.         mov     dword [edi + mtrr_range.start+4], ebp
  411.         mov     dword [edi + mtrr_range.length], eax
  412.         mov     dword [edi + mtrr_range.length+4], edx
  413.         mov     eax, [edi + mtrr_range.next]
  414.         mov     [.first_free_range], eax
  415.         mov     eax, [esi + mtrr_range.next]
  416.         mov     [edi + mtrr_range.next], eax
  417. ; don't change [esi+mtrr_range.next] yet, it will be filled at step 6e
  418.         mov     eax, ebx
  419.         mov     edx, ebp
  420.         sub     eax, dword [esi + mtrr_range.start]
  421.         sbb     edx, dword [esi + mtrr_range.start+4]
  422.         mov     dword [esi + mtrr_range.length], eax
  423.         mov     dword [esi + mtrr_range.length+4], edx
  424. .truncate_uc:
  425. ; 6e. edi is the first range after ebp:ebx, check it and next ranges
  426. ; for intersection with the new range, truncate heads.
  427.         add     ebx, [ecx+8]
  428.         adc     ebp, [ecx+12]
  429. ; ebp:ebx = end of the range described by the current MTRR.
  430. .truncate_uc_loop:
  431. ; If start of the next range is greater than ebp:ebx,
  432. ; exit the loop to 6g.
  433.         mov     eax, ebx
  434.         mov     edx, ebp
  435.         sub     eax, dword [edi + mtrr_range.start]
  436.         sbb     edx, dword [edi + mtrr_range.start+4]
  437.         jb      .truncate_uc_done
  438. ; Otherwise, if end of the next range is greater than ebp:ebx,
  439. ; exit the loop to 6f.
  440.         sub     eax, dword [edi + mtrr_range.length]
  441.         sbb     edx, dword [edi + mtrr_range.length+4]
  442.         jb      .truncate_uc_last
  443. ; Otherwise, the current range is completely within the new range.
  444. ; Free it and continue the loop if there is a next range.
  445. ; If that was a last range, exit the loop to 6g.
  446.         mov     edx, [edi + mtrr_range.next]
  447.         mov     eax, [.first_free_range]
  448.         mov     [.first_free_range], edi
  449.         mov     [edi + mtrr_range.next], eax
  450.         mov     edi, edx
  451.         test    edi, edi
  452.         jnz     .truncate_uc_loop
  453.         jmp     .truncate_uc_done
  454. .truncate_uc_last:
  455. ; 6f. The range at edi partially intersects with the UC-range described by MTRR.
  456. ; Truncate it from the head.
  457.         mov     dword [edi + mtrr_range.start], ebx
  458.         mov     dword [edi + mtrr_range.start+4], ebp
  459.         neg     eax
  460.         adc     edx, 0
  461.         neg     edx
  462.         mov     dword [edi + mtrr_range.length], eax
  463.         mov     dword [edi + mtrr_range.length+4], edx
  464. .truncate_uc_done:
  465. ; 6g. We have found the next range (maybe 0) after intersection.
  466. ; Write it to [esi+mtrr_range.next].
  467.         mov     [esi + mtrr_range.next], edi
  468. .next_uc_range:
  469. ; 6h. Continue the loop 6b-6g over all configured MTRRs.
  470.         add     ecx, 16
  471.         cmp     ecx, [.mtrrs_end]
  472.         jb      .fill_uc_ranges
  473. ; Sanity check: if there are no ranges after steps 5-6,
  474. ; fallback to step 7. Otherwise, go to 8.
  475.         cmp     [.first_range], 0
  476.         jnz     .ranges_ok
  477. .fill_ranges_from_memory_map:
  478. ; 7. BIOS has not configured variable-range MTRRs.
  479. ; Create one range from 0 to [MEM_AMOUNT].
  480.         mov     eax, [.first_free_range]
  481.         mov     edx, [eax + mtrr_range.next]
  482.         mov     [.first_free_range], edx
  483.         mov     [.first_range], eax
  484.         xor     edx, edx
  485.         mov     [eax + mtrr_range.next], edx
  486.         mov     dword [eax + mtrr_range.start], edx
  487.         mov     dword [eax + mtrr_range.start+4], edx
  488.         mov     ecx, [MEM_AMOUNT]
  489.         mov     dword [eax + mtrr_range.length], ecx
  490.         mov     dword [eax + mtrr_range.length+4], edx
  491. .ranges_ok:
  492. ; 8. We have calculated list of WB-ranges.
  493. ; Now we should calculate a list of MTRRs so that
  494. ; * every MTRR describes a range with length = power of 2 and start that is aligned,
  495. ; * every MTRR can be WB or UC
  496. ; * (sum of all WB ranges) minus (sum of all UC ranges) equals the calculated list
  497. ; * top of 4G memory must not be covered by any ranges
  498. ; Example: range [0,0xBC000000) can be converted to
  499. ; [0,0x80000000)+[0x80000000,0xC0000000)-[0xBC000000,0xC0000000)
  500. ; WB            +WB                     -UC
  501. ; but not to [0,0x100000000)-[0xC0000000,0x100000000)-[0xBC000000,0xC0000000).
  502. ; 8a. Check that list of ranges is [0,something) plus, optionally, [4G,something).
  503. ; This holds in practice (see mtrrtest.asm for real-life examples)
  504. ; and significantly simplifies the code: ranges are independent, start of range
  505. ; is almost always aligned (the only exception >4G upper memory can be easily covered),
  506. ; there is no need to consider adding holes before start of range, only
  507. ; append them to end of range.
  508.         xor     eax, eax
  509.         mov     edi, [.first_range]
  510.         cmp     dword [edi + mtrr_range.start], eax
  511.         jnz     .abort
  512.         cmp     dword [edi + mtrr_range.start+4], eax
  513.         jnz     .abort
  514.         cmp     dword [edi + mtrr_range.length+4], eax
  515.         jnz     .abort
  516.         mov     edx, [edi + mtrr_range.next]
  517.         test    edx, edx
  518.         jz      @f
  519.         cmp     dword [edx + mtrr_range.start], eax
  520.         jnz     .abort
  521.         cmp     dword [edx + mtrr_range.start+4], 1
  522.         jnz     .abort
  523.         cmp     [edx + mtrr_range.next], eax
  524.         jnz     .abort
  525. @@:
  526. ; 8b. Initialize: no MTRRs filled.
  527.         mov     [.num_used_mtrrs], eax
  528.         lea     esi, [.mtrrs]
  529. .range2mtrr_loop:
  530. ; 8c. If we are dealing with upper-memory range (after 4G)
  531. ; with length > start, create one WB MTRR with [start,2*start),
  532. ; reset start to 2*start and return to this step.
  533. ; Example: [4G,24G) -> [4G,8G) {returning} + [8G,16G) {returning}
  534. ; + [16G,24G) {advancing to ?}.
  535.         mov     eax, dword [edi + mtrr_range.length+4]
  536.         test    eax, eax
  537.         jz      .less4G
  538.         mov     edx, dword [edi + mtrr_range.start+4]
  539.         cmp     eax, edx
  540.         jb      .start_aligned
  541.         inc     [.num_used_mtrrs]
  542.         cmp     [.num_used_mtrrs], MAX_USEFUL_MTRRS
  543.         ja      .abort
  544.         mov     dword [esi], MEM_WB
  545.         mov     dword [esi+4], edx
  546.         mov     dword [esi+8], 0
  547.         mov     dword [esi+12], edx
  548.         add     esi, 16
  549.         add     dword [edi + mtrr_range.start+4], edx
  550.         sub     dword [edi + mtrr_range.length+4], edx
  551.         jnz     .range2mtrr_loop
  552.         cmp     dword [edi + mtrr_range.length], 0
  553.         jz      .range2mtrr_next
  554. .less4G:
  555. ; 8d. If we are dealing with low-memory range (before 4G)
  556. ; and appending a maximal-size hole would create a range covering top of 4G,
  557. ; create a maximal-size WB range and return to this step.
  558. ; Example: for [0,0xBC000000) the following steps would consider
  559. ; variants [0,0x80000000)+(another range to be splitted) and
  560. ; [0,0x100000000)-(another range to be splitted); we forbid the last variant,
  561. ; so the first variant must be used.
  562.         bsr     ecx, dword [edi + mtrr_range.length]
  563.         xor     edx, edx
  564.         inc     edx
  565.         shl     edx, cl
  566.         lea     eax, [edx*2]
  567.         add     eax, dword [edi + mtrr_range.start]
  568.         jnz     .start_aligned
  569.         inc     [.num_used_mtrrs]
  570.         cmp     [.num_used_mtrrs], MAX_USEFUL_MTRRS
  571.         ja      .abort
  572.         mov     eax, dword [edi + mtrr_range.start]
  573.         mov     dword [esi], eax
  574.         or      dword [esi], MEM_WB
  575.         mov     dword [esi+4], 0
  576.         mov     dword [esi+8], edx
  577.         mov     dword [esi+12], 0
  578.         add     esi, 16
  579.         add     dword [edi + mtrr_range.start], edx
  580.         sub     dword [edi + mtrr_range.length], edx
  581.         jnz     .less4G
  582.         jmp     .range2mtrr_next
  583. .start_aligned:
  584. ; Start is aligned for any allowed length, maximum-size hole is allowed.
  585. ; Select the best MTRR configuration for one range.
  586. ; length=...101101
  587. ; Without hole at the end, we need one WB MTRR for every 1-bit in length:
  588. ; length=...100000 + ...001000 + ...000100 + ...000001
  589. ; We can also append one hole at the end so that one 0-bit (selected by us)
  590. ; becomes 1 and all lower bits become 0 for WB-range:
  591. ; length=...110000 - (...00010 + ...00001)
  592. ; In this way, we need one WB MTRR for every 1-bit higher than the selected bit,
  593. ; one WB MTRR for the selected bit, one UC MTRR for every 0-bit between
  594. ; the selected bit and lowest 1-bit (they become 1-bits after negation)
  595. ; and one UC MTRR for lowest 1-bit.
  596. ; So we need to select 0-bit with the maximal difference
  597. ; (number of 0-bits) - (number of 1-bits) between selected and lowest 1-bit,
  598. ; this equals the gain from using a hole. If the difference is negative for
  599. ; all 0-bits, don't append hole.
  600. ; Note that lowest 1-bit is not included when counting, but selected 0-bit is.
  601. ; 8e. Find the optimal bit position for hole.
  602. ; eax = current difference, ebx = best difference,
  603. ; ecx = hole bit position, edx = current bit position.
  604.         xor     eax, eax
  605.         xor     ebx, ebx
  606.         xor     ecx, ecx
  607.         bsf     edx, dword [edi + mtrr_range.length]
  608.         jnz     @f
  609.         bsf     edx, dword [edi + mtrr_range.length+4]
  610.         add     edx, 32
  611. @@:
  612.         push    edx     ; save position of lowest 1-bit for step 8f
  613. .calc_stat:
  614.         inc     edx
  615.         cmp     edx, 64
  616.         jae     .stat_done
  617.         inc     eax     ; increment difference in hope for 1-bit
  618. ; Note: bt conveniently works with both .length and .length+4,
  619. ; depending on whether edx>=32.
  620.         bt      dword [edi + mtrr_range.length], edx
  621.         jc      .calc_stat
  622.         dec     eax     ; hope was wrong, decrement difference to correct 'inc'
  623.         dec     eax     ; and again, now getting the real difference
  624.         cmp     eax, ebx
  625.         jle     .calc_stat
  626.         mov     ebx, eax
  627.         mov     ecx, edx
  628.         jmp     .calc_stat
  629. .stat_done:
  630. ; 8f. If we decided to create a hole, flip all bits between lowest and selected.
  631.         pop     edx     ; restore position of lowest 1-bit saved at step 8e
  632.         test    ecx, ecx
  633.         jz      .fill_hi_init
  634. @@:
  635.         inc     edx
  636.         cmp     edx, ecx
  637.         ja      .fill_hi_init
  638.         btc     dword [edi + mtrr_range.length], edx
  639.         jmp     @b
  640. .fill_hi_init:
  641. ; 8g. Create MTRR ranges corresponding to upper 32 bits.
  642.         sub     ecx, 32
  643. .fill_hi_loop:
  644.         bsr     edx, dword [edi + mtrr_range.length+4]
  645.         jz      .fill_hi_done
  646.         inc     [.num_used_mtrrs]
  647.         cmp     [.num_used_mtrrs], MAX_USEFUL_MTRRS
  648.         ja      .abort
  649.         mov     eax, dword [edi + mtrr_range.start]
  650.         mov     [esi], eax
  651.         mov     eax, dword [edi + mtrr_range.start+4]
  652.         mov     [esi+4], eax
  653.         xor     eax, eax
  654.         mov     [esi+8], eax
  655.         bts     eax, edx
  656.         mov     [esi+12], eax
  657.         cmp     edx, ecx
  658.         jl      .fill_hi_uc
  659.         or      dword [esi], MEM_WB
  660.         add     dword [edi + mtrr_range.start+4], eax
  661.         jmp     @f
  662. .fill_hi_uc:
  663.         sub     dword [esi+4], eax
  664.         sub     dword [edi + mtrr_range.start+4], eax
  665. @@:
  666.         add     esi, 16
  667.         sub     dword [edi + mtrr_range.length], eax
  668.         jmp     .fill_hi_loop
  669. .fill_hi_done:
  670. ; 8h. Create MTRR ranges corresponding to lower 32 bits.
  671.         add     ecx, 32
  672. .fill_lo_loop:
  673.         bsr     edx, dword [edi+mtrr_range.length]
  674.         jz      .range2mtrr_next
  675.         inc     [.num_used_mtrrs]
  676.         cmp     [.num_used_mtrrs], MAX_USEFUL_MTRRS
  677.         ja      .abort
  678.         mov     eax, dword [edi + mtrr_range.start]
  679.         mov     [esi], eax
  680.         mov     eax, dword [edi + mtrr_range.start+4]
  681.         mov     [esi+4], eax
  682.         xor     eax, eax
  683.         mov     [esi+12], eax
  684.         bts     eax, edx
  685.         mov     [esi+8], eax
  686.         cmp     edx, ecx
  687.         jl      .fill_lo_uc
  688.         or      dword [esi], MEM_WB
  689.         add     dword [edi + mtrr_range.start], eax
  690.         jmp     @f
  691. .fill_lo_uc:
  692.         sub     dword [esi], eax
  693.         sub     dword [edi + mtrr_range.start], eax
  694. @@:
  695.         add     esi, 16
  696.         sub     dword [edi + mtrr_range.length], eax
  697.         jmp     .fill_lo_loop
  698. .range2mtrr_next:
  699. ; 8i. Repeat the loop at 8c-8h for all ranges.
  700.         mov     edi, [edi + mtrr_range.next]
  701.         test    edi, edi
  702.         jnz     .range2mtrr_loop
  703. ; 9. We have calculated needed MTRRs, now setup them in the CPU.
  704. ; 9a. Abort if number of MTRRs is too large.
  705.         mov     eax, [num_variable_mtrrs]
  706.         cmp     [.num_used_mtrrs], eax
  707.         ja      .abort
  708.  
  709. ; 9b. Prepare for changes.
  710.         call    mtrr_begin_change
  711.  
  712. ; 9c. Prepare for loop over MTRRs.
  713.         lea     esi, [.mtrrs]
  714.         mov     ecx, 0x200
  715. @@:
  716. ; 9d. For every MTRR, copy PHYSBASEn as is: step 8 has configured
  717. ; start value and type bits as needed.
  718.         mov     eax, [esi]
  719.         mov     edx, [esi+4]
  720.         wrmsr
  721.         inc     ecx
  722. ; 9e. For every MTRR, calculate PHYSMASKn = -(length) or 0x800
  723. ; with upper bits cleared, 0x800 = MTRR is valid.
  724.         xor     eax, eax
  725.         xor     edx, edx
  726.         sub     eax, [esi+8]
  727.         sbb     edx, [esi+12]
  728.         or      eax, 0x800
  729.         or      edx, [.phys_reserved_mask]
  730.         xor     edx, [.phys_reserved_mask]
  731.         wrmsr
  732.         inc     ecx
  733. ; 9f. Continue steps 9d and 9e for all MTRRs calculated at step 8.
  734.         add     esi, 16
  735.         dec     [.num_used_mtrrs]
  736.         jnz     @b
  737. ; 9g. Zero other MTRRs.
  738.         xor     eax, eax
  739.         xor     edx, edx
  740.         mov     ebx, [num_variable_mtrrs]
  741.         lea     ebx, [0x200+ebx*2]
  742. @@:
  743.         cmp     ecx, ebx
  744.         jae     @f
  745.         wrmsr
  746.         inc     ecx
  747.         wrmsr
  748.         inc     ecx
  749.         jmp     @b
  750. @@:
  751.  
  752. ; 9i. Changes are done.
  753.         call    mtrr_end_change
  754.  
  755. .abort:
  756.         add     esp, .local_vars_size + MAX_RANGES * sizeof.mtrr_range
  757.         pop     ebp
  758.         ret
  759. endp
  760.  
  761. ; Allocate&set one MTRR for given range.
  762. ; size must be power of 2 that divides base.
  763. proc set_mtrr stdcall, base:dword,size:dword,mem_type:dword
  764. ; find unused register
  765.         mov     ecx, 0x201
  766. .scan:
  767.         mov     eax, [num_variable_mtrrs]
  768.         lea     eax, [0x200+eax*2]
  769.         cmp     ecx, eax
  770.         jae     .ret
  771.         rdmsr
  772.         dec     ecx
  773.         test    ah, 8
  774.         jz      .found
  775.         rdmsr
  776.         test    edx, edx
  777.         jnz     @f
  778.         and     eax, not 0xFFF  ; clear reserved bits
  779.         cmp     eax, [base]
  780.         jz      .ret
  781. @@:
  782.         add     ecx, 3
  783.         jmp     .scan
  784. ; no free registers, ignore the call
  785. .ret:
  786.         ret
  787. .found:
  788. ; found, write values
  789.         push    ecx
  790.         call    mtrr_begin_change
  791.         pop     ecx
  792.         xor     edx, edx
  793.         mov     eax, [base]
  794.         or      eax, [mem_type]
  795.         wrmsr
  796.  
  797.         mov     al, [cpu_phys_addr_width]
  798.         xor     edx, edx
  799.         bts     edx, eax
  800.         xor     eax, eax
  801.         sub     eax, [size]
  802.         sbb     edx, 0
  803.         or      eax, 0x800
  804.         inc     ecx
  805.         wrmsr
  806.         call    mtrr_end_change
  807.         ret
  808. endp
  809.  
  810. ; Helper procedure for mtrr_validate.
  811. ; Calculates memory type for given address according to variable-range MTRRs.
  812. ; Assumes that MTRRs are enabled.
  813. ; in: ebx = 32-bit physical address
  814. ; out: eax = memory type for ebx
  815. proc mtrr_get_real_type
  816. ; 1. Initialize: we have not yet found any MTRRs covering ebx.
  817.         push    0
  818.         mov     ecx, 0x201
  819. .mtrr_loop:
  820. ; 2. For every MTRR, check whether it is valid; if not, continue to the next MTRR.
  821.         rdmsr
  822.         dec     ecx
  823.         test    ah, 8
  824.         jz      .next
  825. ; 3. For every valid MTRR, check whether (ebx and PHYSMASKn) == PHYSBASEn,
  826. ; excluding low 12 bits.
  827.         and     eax, ebx
  828.         push    eax
  829.         rdmsr
  830.         test    edx, edx
  831.         pop     edx
  832.         jnz     .next
  833.         xor     edx, eax
  834.         and     edx, not 0xFFF
  835.         jnz     .next
  836. ; 4. If so, set the bit corresponding to memory type defined by this MTRR.
  837.         and     eax, 7
  838.         bts     [esp], eax
  839. .next:
  840. ; 5. Continue loop at 2-4 for all variable-range MTRRs.
  841.         add     ecx, 3
  842.         mov     eax, [num_variable_mtrrs]
  843.         lea     eax, [0x200+eax*2]
  844.         cmp     ecx, eax
  845.         jb      .mtrr_loop
  846. ; 6. If no MTRRs cover address in ebx, use default MTRR type from MTRR_DEF_CAP.
  847.         pop     edx
  848.         test    edx, edx
  849.         jz      .default
  850. ; 7. Find&clear 1-bit in edx.
  851.         bsf     eax, edx
  852.         btr     edx, eax
  853. ; 8. If there was only one 1-bit, then all MTRRs are consistent, return that bit.
  854.         test    edx, edx
  855.         jz      .nothing
  856. ; Otherwise, return MEM_UC (e.g. WB+UC is UC).
  857.         xor     eax, eax
  858. .nothing:
  859.         ret
  860. .default:
  861.         mov     ecx, 0x2FF
  862.         rdmsr
  863.         movzx   eax, al
  864.         ret
  865. endp
  866.  
  867. ; If MTRRs are configured improperly, this is not obvious to the user;
  868. ; everything works, but the performance can be horrible.
  869. ; Try to detect this and let the user know that the low performance
  870. ; is caused by some problem and is not a global property of the system.
  871. ; Let's hope he would report it to developers...
  872. proc mtrr_validate
  873. ; 1. If MTRRs are not supported, they cannot be configured improperly.
  874. ; Note: VirtualBox claims MTRR support in cpuid, but emulates MTRRCAP=0,
  875. ; which is efficiently equivalent to absent MTRRs.
  876. ; So check [num_variable_mtrrs] instead of CAPS_MTRR in [cpu_caps].
  877.         cmp     [num_variable_mtrrs], 0
  878.         jz      .exit
  879. ; 2. If variable-range MTRRs are not configured, this is a problem.
  880.         mov     ecx, 0x2FF
  881.         rdmsr
  882.         test    ah, 8
  883.         jz      .fail
  884. ; 3. Get the memory type for address somewhere inside working memory.
  885. ; It must be write-back.
  886.         mov     ebx, 0x27FFFF
  887.         call    mtrr_get_real_type
  888.         cmp     al, MEM_WB
  889.         jnz     .fail
  890. ; 4. If we're using a mode with LFB,
  891. ; get the memory type for last pixel of the framebuffer.
  892. ; It must be write-combined.
  893.         test    word [SCR_MODE], 0x4000
  894.         jz      .exit
  895.         mov     eax, [_display.lfb_pitch]
  896.         mul     [_display.height]
  897.         dec     eax
  898. ; LFB is mapped to virtual address LFB_BASE,
  899. ; it uses global pages if supported by CPU.
  900.         mov     ebx, [sys_proc + PROC.pdt_0 + (LFB_BASE shr 20)]
  901.         test    ebx, PDE_LARGE
  902.         jnz     @f
  903.         mov     ebx, [page_tabs+(LFB_BASE shr 10)]
  904. @@:
  905.         and     ebx, not 0xFFF
  906.         add     ebx, eax
  907.         call    mtrr_get_real_type
  908.         cmp     al, MEM_WC
  909.         jz      .exit
  910. ; 5. The check at step 4 fails on Bochs:
  911. ; Bochs BIOS configures MTRRs in a strange way not respecting [cpu_phys_addr_width],
  912. ; so mtrr_reconfigure avoids to touch anything.
  913. ; However, Bochs core ignores MTRRs (keeping them only for rdmsr/wrmsr),
  914. ; so we don't care about proper setting for Bochs.
  915. ; Use northbridge PCI id to detect Bochs: it emulates either i440fx or i430fx
  916. ; depending on configuration file.
  917.         mov     eax, [pcidev_list.fd]
  918.         cmp     eax, pcidev_list        ; sanity check: fail if no PCI devices
  919.         jz      .fail
  920.         cmp     [eax + PCIDEV.vendor_device_id], 0x12378086
  921.         jz      .exit
  922.         cmp     [eax + PCIDEV.vendor_device_id], 0x01228086
  923.         jnz     .fail
  924. .exit:
  925.         ret
  926. .fail:
  927.         mov     ebx, mtrr_user_message
  928.         mov     ebp, notifyapp
  929.         call    fs_execute_from_sysdir_param
  930.         ret
  931. endp
  932.