Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5098 | clevermous | 1 | #include "MatchFinder.h" |
2 | /* memcpy must be inlined - we do not want to use RTL */ |
||
3 | #include |
||
4 | /*#pragma function(memcpy) |
||
5 | void* __cdecl memcpy(void* _Dst, const void* _Src, size_t _Size) |
||
6 | { |
||
7 | unsigned long i; |
||
8 | for (i = 0; i < _Size; i++) |
||
9 | ((char*)_Dst)[i] = ((char*)_Src)[i]; |
||
10 | return _Dst; |
||
11 | }*/ |
||
12 | //#pragma intrinsic(memcpy) |
||
13 | |||
14 | #define kMaxValForNormalize (((unsigned)1<<31)-1) |
||
15 | |||
16 | /* settings for bt4: |
||
17 | defined HASH_ARRAY_2 |
||
18 | defined HASH_ARRAY_3 |
||
19 | */ |
||
20 | |||
21 | //#define kHash2Size 0x400 |
||
22 | #define kNumHashDirectBytes 0 |
||
23 | #define kNumHashBytes 3 |
||
24 | //#define kHash3Size 0x40000 |
||
25 | #define kHash2Size 0x10000 |
||
26 | #define kHashSize 0x100000 |
||
27 | |||
28 | #define kHashSizeSum (kHashSize+kHash2Size) |
||
29 | #define kHash2Offset kHashSize |
||
30 | |||
31 | static unsigned _cyclicBufferPos; |
||
32 | static unsigned _cyclicBufferSize; |
||
33 | static unsigned _matchMaxLen; |
||
34 | static unsigned* _hash; |
||
35 | static unsigned _cutValue; |
||
36 | |||
37 | #ifdef GENERIC_INPUT |
||
38 | static byte* _bufferBase; |
||
39 | static unsigned _posLimit; |
||
40 | static bool _streamEndWasReached; |
||
41 | static byte* _pointerToLastSafePosition; |
||
42 | static byte* _buffer; |
||
43 | static unsigned _blockSize; |
||
44 | static unsigned _pos; |
||
45 | static unsigned _keepSizeBefore; |
||
46 | static unsigned _keepSizeAfter; |
||
47 | static unsigned _keepSizeReserv; |
||
48 | static unsigned _streamPos; |
||
49 | #else |
||
50 | #define _buffer curin |
||
51 | #define _pos pack_pos |
||
52 | #define _streamPos pack_length |
||
53 | #endif |
||
54 | |||
55 | #ifdef GENERIC_INPUT |
||
56 | /* LZ Window */ |
||
57 | |||
58 | static void LZInWindow_Create(unsigned keepSizeBefore,unsigned keepSizeAfter,unsigned keepSizeReserv,byte**mem) |
||
59 | { |
||
60 | _keepSizeBefore = keepSizeBefore; |
||
61 | _keepSizeAfter = keepSizeAfter; |
||
62 | _keepSizeReserv = keepSizeReserv; |
||
63 | _blockSize = keepSizeBefore + keepSizeAfter + keepSizeReserv; |
||
64 | _bufferBase = *mem; |
||
65 | _blockSize = (_blockSize + 3) & ~3; |
||
66 | *mem += _blockSize; |
||
67 | _pointerToLastSafePosition = _bufferBase + _blockSize - keepSizeAfter; |
||
68 | } |
||
69 | |||
70 | static void ReadBlock(void) |
||
71 | { |
||
72 | if (_streamEndWasReached) |
||
73 | return; |
||
74 | for (;;) |
||
75 | { |
||
76 | unsigned size; |
||
77 | size = (unsigned)(_bufferBase-_buffer) + _blockSize - _streamPos; |
||
78 | if (!size) return; |
||
79 | if (size > pack_length - pack_pos) |
||
80 | size = pack_length - pack_pos; |
||
81 | memcpy(_buffer+_streamPos,curin,size); |
||
82 | curin += size; |
||
83 | pack_pos += size; |
||
84 | if (size == 0) |
||
85 | { |
||
86 | byte* pointerToPosition; |
||
87 | _posLimit = _streamPos; |
||
88 | pointerToPosition = _buffer + _posLimit; |
||
89 | if (pointerToPosition > _pointerToLastSafePosition) |
||
90 | _posLimit = _pointerToLastSafePosition - _buffer; |
||
91 | _streamEndWasReached = true; |
||
92 | return; |
||
93 | } |
||
94 | _streamPos += size; |
||
95 | if (_streamPos >= _pos + _keepSizeAfter) |
||
96 | { |
||
97 | _posLimit = _streamPos - _keepSizeAfter; |
||
98 | return; |
||
99 | } |
||
100 | } |
||
101 | } |
||
102 | |||
103 | static void LZInWindow_Init(void) |
||
104 | { |
||
105 | _buffer = _bufferBase; |
||
106 | _pos = 0; |
||
107 | _streamPos = 0; |
||
108 | _streamEndWasReached = false; |
||
109 | ReadBlock(); |
||
110 | } |
||
111 | #else |
||
112 | #define LZInWindow_Create(a,b,c,d) /* nothing */ |
||
113 | #define LZInWindow_Init() _buffer--, _pos++, _streamPos++ |
||
114 | #endif |
||
115 | |||
116 | const byte* GetPointerToCurrentPos(void) {return _buffer+_pos;} |
||
117 | |||
118 | #ifdef GENERIC_INPUT |
||
119 | static void MoveBlock(void) |
||
120 | { |
||
121 | unsigned offset,numBytes; |
||
122 | offset = _buffer-_bufferBase+_pos-_keepSizeBefore; |
||
123 | numBytes = _buffer-_bufferBase+_streamPos-offset; |
||
124 | // copying backwards: safe to use memcpy instead of memmove |
||
125 | memcpy(_bufferBase,_bufferBase+offset,numBytes); |
||
126 | _buffer -= offset; |
||
127 | } |
||
128 | |||
129 | static void LZInWindow_MovePos(void) |
||
130 | { |
||
131 | _pos++; |
||
132 | if (_pos > _posLimit) |
||
133 | { |
||
134 | const byte* pointerToPosition = _buffer+_pos; |
||
135 | if (pointerToPosition > _pointerToLastSafePosition) |
||
136 | MoveBlock(); |
||
137 | ReadBlock(); |
||
138 | } |
||
139 | } |
||
140 | #else |
||
141 | #define LZInWindow_MovePos() _pos++ |
||
142 | #endif |
||
143 | |||
144 | byte GetIndexByte(int index) {return _buffer[_pos+index];} |
||
145 | |||
146 | unsigned GetMatchLen(int index,unsigned distance,unsigned limit) |
||
147 | { |
||
148 | const byte* pby; |
||
149 | unsigned i; |
||
150 | #ifdef GENERIC_INPUT |
||
151 | if (_streamEndWasReached) |
||
152 | if ((_pos+index)+limit > _streamPos) |
||
153 | limit = _streamPos - (_pos+index); |
||
154 | #else |
||
155 | unsigned limit2 = pack_length - (pack_pos + index); |
||
156 | if (limit > limit2) |
||
157 | limit = limit2; |
||
158 | #endif |
||
159 | distance++; |
||
160 | pby = _buffer + _pos + index; |
||
161 | for (i=0;i |
||
162 | return i; |
||
163 | } |
||
164 | |||
165 | unsigned GetNumAvailableBytes(void) {return _streamPos-_pos;} |
||
166 | |||
167 | #ifdef GENERIC_INPUT |
||
168 | void ReduceOffsets(int subValue) |
||
169 | { |
||
170 | _buffer += subValue; |
||
171 | _posLimit -= subValue; |
||
172 | _pos -= subValue; |
||
173 | _streamPos -= subValue; |
||
174 | } |
||
175 | #else |
||
176 | #define ReduceOffsets(a) /* nothing */ |
||
177 | #endif |
||
178 | |||
179 | /* Binary tree Match Finder */ |
||
180 | |||
181 | static unsigned crc_table[256]; |
||
182 | |||
183 | void MatchFinder_Create(unsigned historySize,unsigned keepAddBufferBefore,unsigned matchMaxLen,unsigned keepAddBufferAfter,byte**mem) |
||
184 | { |
||
185 | unsigned sizeReserv; |
||
186 | sizeReserv = (historySize + keepAddBufferBefore + matchMaxLen + keepAddBufferAfter)/2+256; |
||
187 | LZInWindow_Create(historySize+keepAddBufferBefore,matchMaxLen+keepAddBufferAfter,sizeReserv,mem); |
||
188 | _matchMaxLen = matchMaxLen; |
||
189 | _cyclicBufferSize = historySize+1; |
||
190 | _hash = (unsigned*)*mem; |
||
191 | *mem += (kHashSizeSum + _cyclicBufferSize*2) * sizeof(unsigned); |
||
192 | _cutValue = 0xFF; |
||
193 | } |
||
194 | |||
195 | void MatchFinder_Init(void) |
||
196 | { |
||
197 | unsigned i,j,r; |
||
198 | LZInWindow_Init(); |
||
199 | for (i=0;i |
||
200 | _hash[i] = 0; |
||
201 | _cyclicBufferPos = 0; |
||
202 | ReduceOffsets(-1); |
||
203 | for (i=0;i<256;i++) |
||
204 | { |
||
205 | r = i; |
||
206 | for (j=0;j<8;j++) |
||
207 | { |
||
208 | if (r & 1) |
||
209 | r = (r>>1) ^ 0xEDB88320; |
||
210 | else |
||
211 | r >>= 1; |
||
212 | } |
||
213 | crc_table[i] = r; |
||
214 | } |
||
215 | } |
||
216 | |||
217 | static unsigned Hash(const byte* ptr, unsigned* hash2Value) |
||
218 | { |
||
219 | unsigned temp; |
||
220 | temp = crc_table[ptr[0]] ^ ptr[1]; |
||
221 | *hash2Value = *(word*)ptr; //ptr[0] + ((unsigned)ptr[1] << 8); |
||
222 | return (temp ^ ((unsigned)ptr[2]<<8)) & (kHashSize - 1); |
||
223 | } |
||
224 | |||
225 | unsigned GetLongestMatch(unsigned* distances) |
||
226 | { |
||
227 | unsigned lenLimit,maxLen=0; |
||
228 | unsigned matchMinPos; |
||
229 | const byte* cur; |
||
230 | unsigned hash2Value,hashValue; |
||
231 | unsigned curMatch,curMatch2; |
||
232 | unsigned *son,*ptr0,*ptr1; |
||
233 | unsigned len0,len1,count; |
||
234 | if (_pos + _matchMaxLen <= _streamPos) |
||
235 | lenLimit = _matchMaxLen; |
||
236 | else |
||
237 | { |
||
238 | lenLimit = _streamPos - _pos; |
||
239 | if (lenLimit < kNumHashBytes) |
||
240 | return 0; |
||
241 | } |
||
242 | matchMinPos = (_pos>_cyclicBufferSize) ? (_pos-_cyclicBufferSize) : 0; |
||
243 | cur = _buffer+_pos; |
||
244 | hashValue = Hash(cur,&hash2Value); |
||
245 | curMatch = _hash[hashValue]; |
||
246 | curMatch2 = _hash[kHash2Offset + hash2Value]; |
||
247 | _hash[kHash2Offset + hash2Value] = _pos; |
||
248 | distances[2] = 0xFFFFFFFF; |
||
249 | if (curMatch2 > matchMinPos) |
||
250 | //if (_buffer[curMatch2] == cur[0]) |
||
251 | { |
||
252 | distances[2] = _pos - curMatch2 - 1; |
||
253 | maxLen = 2; |
||
254 | } |
||
255 | _hash[hashValue] = _pos; |
||
256 | son = _hash + kHashSizeSum; |
||
257 | ptr0 = son + (_cyclicBufferPos << 1) + 1; |
||
258 | ptr1 = son + (_cyclicBufferPos << 1); |
||
259 | distances[kNumHashBytes] = 0xFFFFFFFF; |
||
260 | len0 = len1 = kNumHashDirectBytes; |
||
261 | count = _cutValue; |
||
262 | for (;;) |
||
263 | { |
||
264 | const byte* pb; |
||
265 | unsigned len,delta; |
||
266 | unsigned cyclicPos; |
||
267 | unsigned* pair; |
||
268 | if (curMatch <= matchMinPos || count--==0) |
||
269 | { |
||
270 | *ptr0 = *ptr1 = 0; |
||
271 | break; |
||
272 | } |
||
273 | pb = _buffer+curMatch; |
||
274 | len = (len0 |
||
275 | do |
||
276 | if (pb[len] != cur[len]) break; |
||
277 | while (++len != lenLimit); |
||
278 | delta = _pos - curMatch; |
||
279 | while (maxLen < len) |
||
280 | distances[++maxLen] = delta-1; |
||
281 | cyclicPos = (delta <= _cyclicBufferPos) ? |
||
282 | (_cyclicBufferPos - delta) : |
||
283 | (_cyclicBufferPos - delta + _cyclicBufferSize); |
||
284 | pair = son + (cyclicPos<<1); |
||
285 | if (len != lenLimit) |
||
286 | { |
||
287 | if (pb[len] < cur[len]) |
||
288 | { |
||
289 | *ptr1 = curMatch; |
||
290 | ptr1 = pair+1; |
||
291 | curMatch = *ptr1; |
||
292 | len1 = len; |
||
293 | } |
||
294 | else |
||
295 | { |
||
296 | *ptr0 = curMatch; |
||
297 | ptr0 = pair; |
||
298 | curMatch = *ptr0; |
||
299 | len0 = len; |
||
300 | } |
||
301 | } |
||
302 | else |
||
303 | { |
||
304 | *ptr1 = pair[0]; |
||
305 | *ptr0 = pair[1]; |
||
306 | break; |
||
307 | } |
||
308 | } |
||
309 | /*if (distances[4] < distances[3]) |
||
310 | distances[3] = distances[4]; |
||
311 | if (distances[3] < distances[2]) |
||
312 | distances[2] = distances[3];*/ |
||
313 | return maxLen; |
||
314 | } |
||
315 | |||
316 | void DummyLongestMatch(void) |
||
317 | { |
||
318 | unsigned lenLimit; |
||
319 | unsigned matchMinPos; |
||
320 | const byte* cur; |
||
321 | unsigned hash2Value,hashValue; |
||
322 | unsigned curMatch; |
||
323 | unsigned* son,*ptr0,*ptr1; |
||
324 | unsigned len0,len1,count; |
||
325 | if (_pos + _matchMaxLen <= _streamPos) |
||
326 | lenLimit = _matchMaxLen; |
||
327 | else |
||
328 | { |
||
329 | lenLimit = _streamPos - _pos; |
||
330 | if (lenLimit < kNumHashBytes) |
||
331 | return; |
||
332 | } |
||
333 | matchMinPos = (_pos > _cyclicBufferSize) ? (_pos - _cyclicBufferSize) : 0; |
||
334 | cur = _buffer+_pos; |
||
335 | hashValue = Hash(cur,&hash2Value); |
||
336 | _hash[kHash2Offset + hash2Value] = _pos; |
||
337 | curMatch = _hash[hashValue]; |
||
338 | _hash[hashValue] = _pos; |
||
339 | son = _hash+kHashSizeSum; |
||
340 | ptr0 = son + (_cyclicBufferPos << 1) + 1; |
||
341 | ptr1 = son + (_cyclicBufferPos << 1); |
||
342 | len0 = len1 = kNumHashDirectBytes; |
||
343 | count = _cutValue; |
||
344 | for (;;) |
||
345 | { |
||
346 | const byte* pb; |
||
347 | unsigned len; |
||
348 | unsigned delta,cyclicPos; |
||
349 | unsigned* pair; |
||
350 | if (curMatch <= matchMinPos || count--==0) |
||
351 | break; |
||
352 | pb = _buffer+curMatch; |
||
353 | len = (len0 |
||
354 | do |
||
355 | if (pb[len] != cur[len]) break; |
||
356 | while (++len != lenLimit); |
||
357 | delta = _pos - curMatch; |
||
358 | cyclicPos = (delta <= _cyclicBufferPos) ? |
||
359 | (_cyclicBufferPos - delta) : |
||
360 | (_cyclicBufferPos - delta + _cyclicBufferSize); |
||
361 | pair = son + (cyclicPos << 1); |
||
362 | if (len != lenLimit) |
||
363 | { |
||
364 | if (pb[len] < cur[len]) |
||
365 | { |
||
366 | *ptr1 = curMatch; |
||
367 | ptr1 = pair+1; |
||
368 | curMatch = *ptr1; |
||
369 | len1 = len; |
||
370 | } |
||
371 | else |
||
372 | { |
||
373 | *ptr0 = curMatch; |
||
374 | ptr0 = pair; |
||
375 | curMatch = *ptr0; |
||
376 | len0 = len; |
||
377 | } |
||
378 | } |
||
379 | else |
||
380 | { |
||
381 | *ptr1 = pair[0]; |
||
382 | *ptr0 = pair[1]; |
||
383 | return; |
||
384 | } |
||
385 | } |
||
386 | *ptr0 = *ptr1 = 0; |
||
387 | } |
||
388 | |||
389 | #ifdef GENERIC_INPUT |
||
390 | // for memory input size is always less than kMaxValForNormalize |
||
391 | static void Normalize(void) |
||
392 | { |
||
393 | unsigned subValue; |
||
394 | unsigned* items; |
||
395 | unsigned i,numItems; |
||
396 | subValue = _pos - _cyclicBufferSize; |
||
397 | items = _hash; |
||
398 | numItems = (kHashSizeSum + _cyclicBufferSize*2); |
||
399 | for (i=0;i |
||
400 | { |
||
401 | unsigned value; |
||
402 | value = items[i]; |
||
403 | if (value <= subValue) |
||
404 | value = 0; |
||
405 | else |
||
406 | value -= subValue; |
||
407 | items[i] = value; |
||
408 | } |
||
409 | ReduceOffsets(subValue); |
||
410 | } |
||
411 | #endif |
||
412 | |||
413 | void MatchFinder_MovePos(void) |
||
414 | { |
||
415 | if (++_cyclicBufferPos == _cyclicBufferSize) |
||
416 | _cyclicBufferPos = 0; |
||
417 | LZInWindow_MovePos(); |
||
418 | #ifdef GENERIC_INPUT |
||
419 | if (_pos == kMaxValForNormalize) |
||
420 | Normalize(); |
||
421 | #endif |
||
422 | }=> |