Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. ; $$$$$$$$$$$$$$$$$$$ ABAKIS $$$$$$$$$$$$$$$$$$$$$  
  2. ; *************** STAR^2 SOFTWARE ****************
  3. ; ????????????????? MEMORY.INC ???????????????????
  4.  
  5. ; allocate n          ; see SYSTEM.INC
  6. ; allocate.p p, n
  7. ; destroy p
  8.  
  9. ; memory.set p, v, n  ; 32BIT set/copy/zero
  10. ; memory.copy a, b, n
  11. ; memory.zero p, n
  12.  
  13. ; get.bit v, n   ; get/set/zero bit
  14. ; set.bit v, n
  15. ; zero.bit v, n
  16. ; enable.f v, n  ; enable/disable flag
  17. ; disable.f v, n
  18.  
  19. ; power.2 n      ; is power of 2? which one?
  20. ; align.n n, p   ; versatile align n to p
  21.  
  22. ;;;;;;;;;;;;; MEMORY COPY, SET, ZERO ;;;;;;;;;;;;;
  23.  
  24. ; portable. 32BIT
  25.  
  26. function memory.copy, a, b, n
  27.   alias p=r0, q=r1
  28.   . p=a, q=b, n>>>2
  29.   loop n, (u32) *p++=*q++, endl
  30. endf
  31.  
  32. function memory.set, a, b, n
  33.   alias p=r0, v=r1, x=r2
  34.   . p=a, v=b, n>>>2
  35.   loop n, (u32) *p++=v, endl
  36. endf
  37.  
  38. macro memory.zero p, n { memory.set p, 0, n }
  39.  
  40. ; x86 specific. aligned
  41.  
  42. function memory.copy.x, a, b, n
  43.   push r6 r7
  44.   . r7=a,\
  45.    r6=b, r1=n
  46.   test r7, r6    ; address=0?
  47.   jz .e
  48.   cmp r1, 4      ; if n<4
  49.   jb @f
  50.   push r1
  51.   shr r1, 2      ; n/4
  52.   rep movsd      ; copy dwords
  53.   pop r1
  54.   and r1, 3      ; modulo 4
  55.   jz .e          ; remainder?
  56.   @@:
  57.   rep movsb      ; copy bytes
  58.   .e:
  59.   pop r7 r6
  60. endf
  61.  
  62. function memory.set.x, p, v, n
  63.   push r7
  64.   . r7=p, r0=v,\
  65.    r0*01010101h,\
  66.    r1=n
  67.   test r7, r7 ; address=0?
  68.   jz .e
  69.   cmp r1, 4   ; n<4?
  70.   jb @f
  71.   push r1
  72.   shr r1, 2
  73.   rep stosd   ; copy dwords
  74.   pop r1
  75.   and r1, 3   ; modulo 4
  76.   jz .e       ; remainder?
  77.   @@:
  78.   rep stosb   ; copy bytes
  79.   .e:
  80.   pop r7
  81. endf
  82.  
  83. ;;;;;;;;;;;;;;;; GET/SET/ZERO BIT ;;;;;;;;;;;;;;;;
  84.  
  85. ; 76543210. warning: r0/r1/r2 cannot be used
  86. ; as parameters. 'v' should be m, 'i' can be m/i
  87.  
  88. macro get.bit v, i {  ; (v>>i)&1
  89.  . r0=v, r1=i, r0>>cl, r0&1
  90. }
  91.  
  92. macro set.bit v, i {  ; v|=(1<<i)
  93.  . r0=1, r1=i, r0<<cl, v|r0
  94. }
  95.  
  96. macro zero.bit v, i { ; v&=~(1<<i)
  97.  . r0=1, r1=i, r0<<cl, not r0, v&r0
  98. }
  99.  
  100. ; 1111.0000
  101.  
  102. macro get.nibble v, i { ; (v>>(i*4))&1111b
  103.  . r0=v, r1=i, r1<<2, r0>>cl, r0&1111b
  104. }
  105.  
  106. macro set.nibble v, i, n { ; v|=(n<<(i*4))
  107.  . r0=v, r1=i, r2=n, r1<<2, r2<<cl,\
  108.  r0|r2, v=r0
  109. }
  110.  
  111. ; 33.22.11.00
  112.  
  113. macro get.couple v, i { ; (v>>(i*2))&11b
  114.  . r0=v, r1=i, r1<<1, r0>>cl, r0&11b
  115. }
  116.  
  117. macro set.couple v, i, n { ; v|=(n<<(i*2))
  118.  . r0=v, r1=i, r2=n, r1<<1, r2<<cl,\
  119.  r0|r2, v=r0
  120. }
  121.  
  122. ; enable/disable flag
  123.  
  124. macro enable.f v, n { . r0=n, v|r0 }
  125.  
  126. macro disable.f v, n
  127.  { . r0=n, not r0, v&r0 }
  128.  
  129. macro toggle n { xor n, 1 } ; invert 1/0
  130.  
  131. ; create AA.BBB.CCCb/AA.BB.CC.DDb BIT structures
  132.  
  133. function triplet, a, b, c
  134.   . r0=a, r0<<6, r1=b, r1<<3, r0|r1, r0|c
  135. endf
  136.  
  137. function quadruplet, a, b, c, d
  138.   . r0=a, r0<<6, r1=b, r1<<4
  139.   . r2=c, r2<<2, r0|r1, r0|r2, r0|d
  140. endf
  141.  
  142. ; reverse byte order
  143.  
  144. macro reverse.32 n
  145.  { . r0=n, bswap r0 }
  146.  
  147. macro reverse.24 n
  148.  { . r0=n, bswap r0, r0>>>8 }
  149.  
  150. macro reverse.16 n
  151.  { . r0=n, cl=al, al=ah, ah=cl }
  152.  
  153. ;;;;;;;;;;;;;;;;;; POWERS OF 2 ;;;;;;;;;;;;;;;;;;;
  154.  
  155. ; an unsigned number is a power of 2 if only
  156. ; 1 BIT is set: if !(n&n-1). subtracting 1
  157. ; inverts all BITs. if n=10000000b (80h/128),
  158. ; n&01111111b=0
  159.  
  160. ; to find out which power of 2, search n
  161. ; for 1st 0 BIT from right to left
  162.  
  163. ; is n power of 2? example: power.2 128
  164. ; returns 7
  165.  
  166. function power.2, n
  167.   locals i
  168.   . r0=n
  169.   if r0<2, go .r0, end
  170.   . r1=r0, r1-1, r0&r1
  171.   test r0, r0
  172.   jnz .r0
  173.   . n--, i=1
  174.   @@:
  175.    . r0=1, r1=i, r0<<cl, i++
  176.    test n, r0
  177.   jnz @b
  178.   . r0=i, r0--
  179.   jmp @f
  180.   .r0: . r0=0
  181.   @@:
  182. endf
  183.  
  184. ;;;;;;;;;;;;;;;;;;;;; ALIGN ;;;;;;;;;;;;;;;;;;;;;;
  185.  
  186. ; versatile align n/umber by power of 2
  187.  
  188. ; return n aligned to p in r0. in r1,
  189. ; return the quantity to add to make n
  190. ; divisible by p. algorithm:
  191.  
  192. ; n+(((p-1)-(n+p-1))&(p-1))
  193.  
  194. function align.n, n, p
  195.   . r1=p, r1-1, r2=n, r2+r1, r0=r1
  196.   . r0-r2, r0&r1, r1=r0, r2=n, r0+r2
  197. endf
  198.  
  199. ;;;;;;;;;;;;;;;; SOURCE, DESTINY ;;;;;;;;;;;;;;;;;
  200.  
  201. align
  202.  
  203. void source, destiny
  204. integer origin, omega
  205.  
  206. function create.source, size
  207.   destroy source
  208.   try source=allocate size
  209.   memory.zero source, size
  210.   . origin=0, omega=0
  211. endf 1
  212.  
  213. function create.destiny, size
  214.   destroy destiny
  215.   try destiny=allocate size
  216.   memory.zero destiny, size
  217.   . origin=0, omega=0
  218. endf 1
  219.  
  220. ;;;;;;;;;;;;;;;;;;; TESTING... ;;;;;;;;;;;;;;;;;;;
  221.  
  222. ; optimized 128-BIT copy/set. ".fast"=CPU
  223. ; specific, but they do not replace the
  224. ; portable algorithms
  225.  
  226. ; address p must be aligned by 16 (movaps) and
  227. ; size n must be divisible by 16. v/alue must
  228. ; be 32BIT or use 1/2 macros to expand
  229.  
  230. function memory.set.fast, p, v, n
  231.   . r0=p, r2=v, r1=n,\  ; start at end
  232.    r0+r1, neg r1        ; negate index
  233.   movd xmm0, r2
  234.   pshufd xmm0, xmm0, 0  ; duplicate dwords
  235.   @@:
  236.    movaps [r0+r1], xmm0
  237.    add r1, 16
  238.   jnz @b
  239. endf
  240.  
  241. macro memory.set.2.fast p, v, n {
  242.  . r0=v, r0*00010001h
  243.  memory.set.fast p, r0, n
  244. }
  245.  
  246. macro memory.set.1.fast p, v, n {
  247.  . r0=v, r0*01010101h
  248.  memory.set.fast p, r0, n
  249. }
  250.  
  251. ; destiny a/ddress must be aligned by 16 and
  252. ; size n must be divisible by 16
  253.  
  254. function memory.copy.fast.a, a, b, n
  255.   . r0=a, r2=b, r1=n,\
  256.    r0+r1, r2+r1, neg r1
  257.   @@:
  258.    movaps xmm0, [r2+r1]
  259.    movaps [r0+r1], xmm0
  260.    add r1, 16
  261.   jnz @b
  262. endf
  263.  
  264. ; unaligned...
  265.  
  266. function memory.copy.fast, a, b, n
  267.   . r0=a, r2=b, r1=n,\
  268.    r0+r1, r2+r1, neg r1
  269.   @@:
  270.    movups xmm0, [r2+r1]
  271.    movups [r0+r1], xmm0
  272.    add r1, 16
  273.   jnz @b
  274. endf