Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5305 codemaster 1
; $$$$$$$$$$$$$$$$$$$ ABAKIS $$$$$$$$$$$$$$$$$$$$$
2
; *************** STAR^2 SOFTWARE ****************
3
; ????????????????? MEMORY.INC ???????????????????
4
 
5
; allocate n          ; see SYSTEM.INC
6
; allocate.p p, n
7
; destroy p
8
 
9
; memory.set p, v, n  ; 32BIT set/copy/zero
10
; memory.copy a, b, n
11
; memory.zero p, n
12
 
13
; get.bit v, n   ; get/set/zero bit
14
; set.bit v, n
15
; zero.bit v, n
16
; enable.f v, n  ; enable/disable flag
17
; disable.f v, n
18
 
19
; power.2 n      ; is power of 2? which one?
20
; align.n n, p   ; versatile align n to p
21
 
22
;;;;;;;;;;;;; MEMORY COPY, SET, ZERO ;;;;;;;;;;;;;
23
 
24
; portable. 32BIT
25
 
26
function memory.copy, a, b, n
27
  alias p=r0, q=r1
28
  . p=a, q=b, n>>>2
29
  loop n, (u32) *p++=*q++, endl
30
endf
31
 
32
function memory.set, a, b, n
33
  alias p=r0, v=r1, x=r2
34
  . p=a, v=b, n>>>2
35
  loop n, (u32) *p++=v, endl
36
endf
37
 
38
macro memory.zero p, n { memory.set p, 0, n }
39
 
40
; x86 specific. aligned
41
 
42
function memory.copy.x, a, b, n
43
  push r6 r7
44
  . r7=a,\
45
   r6=b, r1=n
46
  test r7, r6    ; address=0?
47
  jz .e
48
  cmp r1, 4      ; if n<4
49
  jb @f
50
  push r1
51
  shr r1, 2      ; n/4
52
  rep movsd      ; copy dwords
53
  pop r1
54
  and r1, 3      ; modulo 4
55
  jz .e          ; remainder?
56
  @@:
57
  rep movsb      ; copy bytes
58
  .e:
59
  pop r7 r6
60
endf
61
 
62
function memory.set.x, p, v, n
63
  push r7
64
  . r7=p, r0=v,\
65
   r0*01010101h,\
66
   r1=n
67
  test r7, r7 ; address=0?
68
  jz .e
69
  cmp r1, 4   ; n<4?
70
  jb @f
71
  push r1
72
  shr r1, 2
73
  rep stosd   ; copy dwords
74
  pop r1
75
  and r1, 3   ; modulo 4
76
  jz .e       ; remainder?
77
  @@:
78
  rep stosb   ; copy bytes
79
  .e:
80
  pop r7
81
endf
82
 
83
;;;;;;;;;;;;;;;; GET/SET/ZERO BIT ;;;;;;;;;;;;;;;;
84
 
85
; 76543210. warning: r0/r1/r2 cannot be used
86
; as parameters. 'v' should be m, 'i' can be m/i
87
 
88
macro get.bit v, i {  ; (v>>i)&1
89
 . r0=v, r1=i, r0>>cl, r0&1
90
}
91
 
92
macro set.bit v, i {  ; v|=(1<
93
 . r0=1, r1=i, r0<
94
}
95
 
96
macro zero.bit v, i { ; v&=~(1<
97
 . r0=1, r1=i, r0<
98
}
99
 
100
; 1111.0000
101
 
102
macro get.nibble v, i { ; (v>>(i*4))&1111b
103
 . r0=v, r1=i, r1<<2, r0>>cl, r0&1111b
104
}
105
 
106
macro set.nibble v, i, n { ; v|=(n<<(i*4))
107
 . r0=v, r1=i, r2=n, r1<<2, r2<
108
 r0|r2, v=r0
109
}
110
 
111
; 33.22.11.00
112
 
113
macro get.couple v, i { ; (v>>(i*2))&11b
114
 . r0=v, r1=i, r1<<1, r0>>cl, r0&11b
115
}
116
 
117
macro set.couple v, i, n { ; v|=(n<<(i*2))
118
 . r0=v, r1=i, r2=n, r1<<1, r2<
119
 r0|r2, v=r0
120
}
121
 
122
; enable/disable flag
123
 
124
macro enable.f v, n { . r0=n, v|r0 }
125
 
126
macro disable.f v, n
127
 { . r0=n, not r0, v&r0 }
128
 
129
macro toggle n { xor n, 1 } ; invert 1/0
130
 
131
; create AA.BBB.CCCb/AA.BB.CC.DDb BIT structures
132
 
133
function triplet, a, b, c
134
  . r0=a, r0<<6, r1=b, r1<<3, r0|r1, r0|c
135
endf
136
 
137
function quadruplet, a, b, c, d
138
  . r0=a, r0<<6, r1=b, r1<<4
139
  . r2=c, r2<<2, r0|r1, r0|r2, r0|d
140
endf
141
 
142
; reverse byte order
143
 
144
macro reverse.32 n
145
 { . r0=n, bswap r0 }
146
 
147
macro reverse.24 n
148
 { . r0=n, bswap r0, r0>>>8 }
149
 
150
macro reverse.16 n
151
 { . r0=n, cl=al, al=ah, ah=cl }
152
 
153
;;;;;;;;;;;;;;;;;; POWERS OF 2 ;;;;;;;;;;;;;;;;;;;
154
 
155
; an unsigned number is a power of 2 if only
156
; 1 BIT is set: if !(n&n-1). subtracting 1
157
; inverts all BITs. if n=10000000b (80h/128),
158
; n&01111111b=0
159
 
160
; to find out which power of 2, search n
161
; for 1st 0 BIT from right to left
162
 
163
; is n power of 2? example: power.2 128
164
; returns 7
165
 
166
function power.2, n
167
  locals i
168
  . r0=n
169
  if r0<2, go .r0, end
170
  . r1=r0, r1-1, r0&r1
171
  test r0, r0
172
  jnz .r0
173
  . n--, i=1
174
  @@:
175
   . r0=1, r1=i, r0<
176
   test n, r0
177
  jnz @b
178
  . r0=i, r0--
179
  jmp @f
180
  .r0: . r0=0
181
  @@:
182
endf
183
 
184
;;;;;;;;;;;;;;;;;;;;; ALIGN ;;;;;;;;;;;;;;;;;;;;;;
185
 
186
; versatile align n/umber by power of 2
187
 
188
; return n aligned to p in r0. in r1,
189
; return the quantity to add to make n
190
; divisible by p. algorithm:
191
 
192
; n+(((p-1)-(n+p-1))&(p-1))
193
 
194
function align.n, n, p
195
  . r1=p, r1-1, r2=n, r2+r1, r0=r1
196
  . r0-r2, r0&r1, r1=r0, r2=n, r0+r2
197
endf
198
 
199
;;;;;;;;;;;;;;;; SOURCE, DESTINY ;;;;;;;;;;;;;;;;;
200
 
201
align
202
 
203
void source, destiny
204
integer origin, omega
205
 
206
function create.source, size
207
  destroy source
208
  try source=allocate size
209
  memory.zero source, size
210
  . origin=0, omega=0
211
endf 1
212
 
213
function create.destiny, size
214
  destroy destiny
215
  try destiny=allocate size
216
  memory.zero destiny, size
217
  . origin=0, omega=0
218
endf 1
219
 
220
;;;;;;;;;;;;;;;;;;; TESTING... ;;;;;;;;;;;;;;;;;;;
221
 
222
; optimized 128-BIT copy/set. ".fast"=CPU
223
; specific, but they do not replace the
224
; portable algorithms
225
 
226
; address p must be aligned by 16 (movaps) and
227
; size n must be divisible by 16. v/alue must
228
; be 32BIT or use 1/2 macros to expand
229
 
230
function memory.set.fast, p, v, n
231
  . r0=p, r2=v, r1=n,\  ; start at end
232
   r0+r1, neg r1        ; negate index
233
  movd xmm0, r2
234
  pshufd xmm0, xmm0, 0  ; duplicate dwords
235
  @@:
236
   movaps [r0+r1], xmm0
237
   add r1, 16
238
  jnz @b
239
endf
240
 
241
macro memory.set.2.fast p, v, n {
242
 . r0=v, r0*00010001h
243
 memory.set.fast p, r0, n
244
}
245
 
246
macro memory.set.1.fast p, v, n {
247
 . r0=v, r0*01010101h
248
 memory.set.fast p, r0, n
249
}
250
 
251
; destiny a/ddress must be aligned by 16 and
252
; size n must be divisible by 16
253
 
254
function memory.copy.fast.a, a, b, n
255
  . r0=a, r2=b, r1=n,\
256
   r0+r1, r2+r1, neg r1
257
  @@:
258
   movaps xmm0, [r2+r1]
259
   movaps [r0+r1], xmm0
260
   add r1, 16
261
  jnz @b
262
endf
263
 
264
; unaligned...
265
 
266
function memory.copy.fast, a, b, n
267
  . r0=a, r2=b, r1=n,\
268
   r0+r1, r2+r1, neg r1
269
  @@:
270
   movups xmm0, [r2+r1]
271
   movups [r0+r1], xmm0
272
   add r1, 16
273
  jnz @b
274
endf