Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5305 | codemaster | 1 | ; $$$$$$$$$$$$$$$$$$$ ABAKIS $$$$$$$$$$$$$$$$$$$$$ |
2 | ; *************** STAR^2 SOFTWARE **************** |
||
3 | ; ????????????????? MEMORY.INC ??????????????????? |
||
4 | |||
5 | ; allocate n ; see SYSTEM.INC |
||
6 | ; allocate.p p, n |
||
7 | ; destroy p |
||
8 | |||
9 | ; memory.set p, v, n ; 32BIT set/copy/zero |
||
10 | ; memory.copy a, b, n |
||
11 | ; memory.zero p, n |
||
12 | |||
13 | ; get.bit v, n ; get/set/zero bit |
||
14 | ; set.bit v, n |
||
15 | ; zero.bit v, n |
||
16 | ; enable.f v, n ; enable/disable flag |
||
17 | ; disable.f v, n |
||
18 | |||
19 | ; power.2 n ; is power of 2? which one? |
||
20 | ; align.n n, p ; versatile align n to p |
||
21 | |||
22 | ;;;;;;;;;;;;; MEMORY COPY, SET, ZERO ;;;;;;;;;;;;; |
||
23 | |||
24 | ; portable. 32BIT |
||
25 | |||
26 | function memory.copy, a, b, n |
||
27 | alias p=r0, q=r1 |
||
28 | . p=a, q=b, n>>>2 |
||
29 | loop n, (u32) *p++=*q++, endl |
||
30 | endf |
||
31 | |||
32 | function memory.set, a, b, n |
||
33 | alias p=r0, v=r1, x=r2 |
||
34 | . p=a, v=b, n>>>2 |
||
35 | loop n, (u32) *p++=v, endl |
||
36 | endf |
||
37 | |||
38 | macro memory.zero p, n { memory.set p, 0, n } |
||
39 | |||
40 | ; x86 specific. aligned |
||
41 | |||
42 | function memory.copy.x, a, b, n |
||
43 | push r6 r7 |
||
44 | . r7=a,\ |
||
45 | r6=b, r1=n |
||
46 | test r7, r6 ; address=0? |
||
47 | jz .e |
||
48 | cmp r1, 4 ; if n<4 |
||
49 | jb @f |
||
50 | push r1 |
||
51 | shr r1, 2 ; n/4 |
||
52 | rep movsd ; copy dwords |
||
53 | pop r1 |
||
54 | and r1, 3 ; modulo 4 |
||
55 | jz .e ; remainder? |
||
56 | @@: |
||
57 | rep movsb ; copy bytes |
||
58 | .e: |
||
59 | pop r7 r6 |
||
60 | endf |
||
61 | |||
62 | function memory.set.x, p, v, n |
||
63 | push r7 |
||
64 | . r7=p, r0=v,\ |
||
65 | r0*01010101h,\ |
||
66 | r1=n |
||
67 | test r7, r7 ; address=0? |
||
68 | jz .e |
||
69 | cmp r1, 4 ; n<4? |
||
70 | jb @f |
||
71 | push r1 |
||
72 | shr r1, 2 |
||
73 | rep stosd ; copy dwords |
||
74 | pop r1 |
||
75 | and r1, 3 ; modulo 4 |
||
76 | jz .e ; remainder? |
||
77 | @@: |
||
78 | rep stosb ; copy bytes |
||
79 | .e: |
||
80 | pop r7 |
||
81 | endf |
||
82 | |||
83 | ;;;;;;;;;;;;;;;; GET/SET/ZERO BIT ;;;;;;;;;;;;;;;; |
||
84 | |||
85 | ; 76543210. warning: r0/r1/r2 cannot be used |
||
86 | ; as parameters. 'v' should be m, 'i' can be m/i |
||
87 | |||
88 | macro get.bit v, i { ; (v>>i)&1 |
||
89 | . r0=v, r1=i, r0>>cl, r0&1 |
||
90 | } |
||
91 | |||
92 | macro set.bit v, i { ; v|=(1< |
||
93 | . r0=1, r1=i, r0< |
||
94 | } |
||
95 | |||
96 | macro zero.bit v, i { ; v&=~(1< |
||
97 | . r0=1, r1=i, r0< |
||
98 | } |
||
99 | |||
100 | ; 1111.0000 |
||
101 | |||
102 | macro get.nibble v, i { ; (v>>(i*4))&1111b |
||
103 | . r0=v, r1=i, r1<<2, r0>>cl, r0&1111b |
||
104 | } |
||
105 | |||
106 | macro set.nibble v, i, n { ; v|=(n<<(i*4)) |
||
107 | . r0=v, r1=i, r2=n, r1<<2, r2< |
||
108 | r0|r2, v=r0 |
||
109 | } |
||
110 | |||
111 | ; 33.22.11.00 |
||
112 | |||
113 | macro get.couple v, i { ; (v>>(i*2))&11b |
||
114 | . r0=v, r1=i, r1<<1, r0>>cl, r0&11b |
||
115 | } |
||
116 | |||
117 | macro set.couple v, i, n { ; v|=(n<<(i*2)) |
||
118 | . r0=v, r1=i, r2=n, r1<<1, r2< |
||
119 | r0|r2, v=r0 |
||
120 | } |
||
121 | |||
122 | ; enable/disable flag |
||
123 | |||
124 | macro enable.f v, n { . r0=n, v|r0 } |
||
125 | |||
126 | macro disable.f v, n |
||
127 | { . r0=n, not r0, v&r0 } |
||
128 | |||
129 | macro toggle n { xor n, 1 } ; invert 1/0 |
||
130 | |||
131 | ; create AA.BBB.CCCb/AA.BB.CC.DDb BIT structures |
||
132 | |||
133 | function triplet, a, b, c |
||
134 | . r0=a, r0<<6, r1=b, r1<<3, r0|r1, r0|c |
||
135 | endf |
||
136 | |||
137 | function quadruplet, a, b, c, d |
||
138 | . r0=a, r0<<6, r1=b, r1<<4 |
||
139 | . r2=c, r2<<2, r0|r1, r0|r2, r0|d |
||
140 | endf |
||
141 | |||
142 | ; reverse byte order |
||
143 | |||
144 | macro reverse.32 n |
||
145 | { . r0=n, bswap r0 } |
||
146 | |||
147 | macro reverse.24 n |
||
148 | { . r0=n, bswap r0, r0>>>8 } |
||
149 | |||
150 | macro reverse.16 n |
||
151 | { . r0=n, cl=al, al=ah, ah=cl } |
||
152 | |||
153 | ;;;;;;;;;;;;;;;;;; POWERS OF 2 ;;;;;;;;;;;;;;;;;;; |
||
154 | |||
155 | ; an unsigned number is a power of 2 if only |
||
156 | ; 1 BIT is set: if !(n&n-1). subtracting 1 |
||
157 | ; inverts all BITs. if n=10000000b (80h/128), |
||
158 | ; n&01111111b=0 |
||
159 | |||
160 | ; to find out which power of 2, search n |
||
161 | ; for 1st 0 BIT from right to left |
||
162 | |||
163 | ; is n power of 2? example: power.2 128 |
||
164 | ; returns 7 |
||
165 | |||
166 | function power.2, n |
||
167 | locals i |
||
168 | . r0=n |
||
169 | if r0<2, go .r0, end |
||
170 | . r1=r0, r1-1, r0&r1 |
||
171 | test r0, r0 |
||
172 | jnz .r0 |
||
173 | . n--, i=1 |
||
174 | @@: |
||
175 | . r0=1, r1=i, r0< |
||
176 | test n, r0 |
||
177 | jnz @b |
||
178 | . r0=i, r0-- |
||
179 | jmp @f |
||
180 | .r0: . r0=0 |
||
181 | @@: |
||
182 | endf |
||
183 | |||
184 | ;;;;;;;;;;;;;;;;;;;;; ALIGN ;;;;;;;;;;;;;;;;;;;;;; |
||
185 | |||
186 | ; versatile align n/umber by power of 2 |
||
187 | |||
188 | ; return n aligned to p in r0. in r1, |
||
189 | ; return the quantity to add to make n |
||
190 | ; divisible by p. algorithm: |
||
191 | |||
192 | ; n+(((p-1)-(n+p-1))&(p-1)) |
||
193 | |||
194 | function align.n, n, p |
||
195 | . r1=p, r1-1, r2=n, r2+r1, r0=r1 |
||
196 | . r0-r2, r0&r1, r1=r0, r2=n, r0+r2 |
||
197 | endf |
||
198 | |||
199 | ;;;;;;;;;;;;;;;; SOURCE, DESTINY ;;;;;;;;;;;;;;;;; |
||
200 | |||
201 | align |
||
202 | |||
203 | void source, destiny |
||
204 | integer origin, omega |
||
205 | |||
206 | function create.source, size |
||
207 | destroy source |
||
208 | try source=allocate size |
||
209 | memory.zero source, size |
||
210 | . origin=0, omega=0 |
||
211 | endf 1 |
||
212 | |||
213 | function create.destiny, size |
||
214 | destroy destiny |
||
215 | try destiny=allocate size |
||
216 | memory.zero destiny, size |
||
217 | . origin=0, omega=0 |
||
218 | endf 1 |
||
219 | |||
220 | ;;;;;;;;;;;;;;;;;;; TESTING... ;;;;;;;;;;;;;;;;;;; |
||
221 | |||
222 | ; optimized 128-BIT copy/set. ".fast"=CPU |
||
223 | ; specific, but they do not replace the |
||
224 | ; portable algorithms |
||
225 | |||
226 | ; address p must be aligned by 16 (movaps) and |
||
227 | ; size n must be divisible by 16. v/alue must |
||
228 | ; be 32BIT or use 1/2 macros to expand |
||
229 | |||
230 | function memory.set.fast, p, v, n |
||
231 | . r0=p, r2=v, r1=n,\ ; start at end |
||
232 | r0+r1, neg r1 ; negate index |
||
233 | movd xmm0, r2 |
||
234 | pshufd xmm0, xmm0, 0 ; duplicate dwords |
||
235 | @@: |
||
236 | movaps [r0+r1], xmm0 |
||
237 | add r1, 16 |
||
238 | jnz @b |
||
239 | endf |
||
240 | |||
241 | macro memory.set.2.fast p, v, n { |
||
242 | . r0=v, r0*00010001h |
||
243 | memory.set.fast p, r0, n |
||
244 | } |
||
245 | |||
246 | macro memory.set.1.fast p, v, n { |
||
247 | . r0=v, r0*01010101h |
||
248 | memory.set.fast p, r0, n |
||
249 | } |
||
250 | |||
251 | ; destiny a/ddress must be aligned by 16 and |
||
252 | ; size n must be divisible by 16 |
||
253 | |||
254 | function memory.copy.fast.a, a, b, n |
||
255 | . r0=a, r2=b, r1=n,\ |
||
256 | r0+r1, r2+r1, neg r1 |
||
257 | @@: |
||
258 | movaps xmm0, [r2+r1] |
||
259 | movaps [r0+r1], xmm0 |
||
260 | add r1, 16 |
||
261 | jnz @b |
||
262 | endf |
||
263 | |||
264 | ; unaligned... |
||
265 | |||
266 | function memory.copy.fast, a, b, n |
||
267 | . r0=a, r2=b, r1=n,\ |
||
268 | r0+r1, r2+r1, neg r1 |
||
269 | @@: |
||
270 | movups xmm0, [r2+r1] |
||
271 | movups [r0+r1], xmm0 |
||
272 | add r1, 16 |
||
273 | jnz @b |
||
274 | endf |