Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
997 | diamond | 1 | ; in: ebx -> pattern (not empty zero-terminated string), esi -> translation table, |
2 | ; dword [esp+4] = encoding, byte [esp+8] non-zero for whole words only |
||
3 | ; out: edx and ecx -> preprocessed data |
||
4 | ; when search will be done, edx must be pgfree()d |
||
5 | search_string_pre: |
||
6 | ; FSM is used, number of states is limited by 256, so pattern length must be <= 255 |
||
7 | ; anyway, for big patterns FSM uses too many memory, so probably it is not best choice |
||
8 | ; get pattern length, m |
||
9 | or ecx, -1 |
||
10 | @@: |
||
11 | inc ecx |
||
12 | cmp byte [ecx+ebx], 0 |
||
13 | jnz @b |
||
14 | cmp byte [esp+8], 0 |
||
15 | jz @f |
||
16 | inc ecx |
||
17 | inc ecx |
||
18 | @@: |
||
19 | push ecx |
||
20 | ; allocate m*257 bytes for FSM and prefix function |
||
21 | imul ecx, 257 |
||
22 | call xpgalloc |
||
23 | pop ecx |
||
24 | test eax, eax |
||
25 | jnz @f |
||
26 | ret 8 |
||
27 | @@: |
||
28 | shl ecx, 8 |
||
29 | push eax |
||
30 | add eax, ecx |
||
31 | ; calculate prefix function |
||
32 | xor ecx, ecx |
||
33 | mov byte [eax], cl |
||
34 | xor edi, edi |
||
35 | cmp byte [esp+4+8], 0 |
||
36 | jnz .whole.prefixcalc |
||
37 | .prefixcalc: |
||
38 | inc edi |
||
39 | movzx edx, byte [ebx+edi] |
||
40 | mov dl, [esi+edx] |
||
41 | test dl, dl |
||
42 | jz .prefixdone |
||
43 | @@: |
||
44 | push eax |
||
45 | movzx eax, byte [ebx+ecx] |
||
46 | cmp dl, [esi+eax] |
||
47 | pop eax |
||
48 | jz @f |
||
49 | jecxz .prefixint |
||
50 | mov cl, byte [eax+ecx-1] |
||
51 | jmp @b |
||
52 | @@: |
||
53 | inc ecx |
||
54 | .prefixint: |
||
55 | mov [eax+edi], cl |
||
56 | jmp .prefixcalc |
||
57 | .whole.prefixcalc: |
||
58 | inc edi |
||
59 | movzx edx, byte [ebx+edi-1] |
||
60 | mov dl, [esi+edx] |
||
61 | test dl, dl |
||
62 | jz .whole.prefixdone |
||
63 | .whole.prefixloop: |
||
64 | jecxz .whole.testfirst |
||
65 | push eax |
||
66 | movzx eax, byte [ebx+ecx-1] |
||
67 | cmp dl, [esi+eax] |
||
68 | pop eax |
||
69 | jz @f |
||
70 | mov cl, byte [eax+ecx-1] |
||
71 | jmp .whole.prefixloop |
||
72 | .whole.testfirst: |
||
73 | cmp [isspace_table+edx], 0 |
||
74 | jz .whole.prefixint |
||
75 | @@: |
||
76 | inc ecx |
||
77 | .whole.prefixint: |
||
78 | mov [eax+edi], cl |
||
79 | jmp .whole.prefixcalc |
||
80 | .whole.prefixdone: |
||
81 | jecxz @f |
||
82 | push eax |
||
83 | movzx eax, byte [ebx+ecx-1] |
||
84 | mov al, [esi+eax] |
||
85 | cmp [isspace_table+eax], 0 |
||
86 | pop eax |
||
87 | jnz @f |
||
88 | mov cl, byte [eax+ecx-1] |
||
89 | jmp .whole.prefixdone |
||
90 | @@: |
||
91 | inc ecx |
||
92 | mov [eax+edi], cl |
||
93 | .prefixdone: |
||
94 | pop edx |
||
95 | ; create reverse table for encoding+translation |
||
96 | push ebp |
||
97 | movzx ebp, byte [esp+8] |
||
98 | cmp ebp, encodings.unicode |
||
99 | jb @f |
||
100 | xor ebp, ebp ; no translations for Unicode encodings, |
||
101 | ; they must be handled separately by caller |
||
102 | @@: |
||
103 | mov ecx, 256 |
||
104 | @@: |
||
105 | push 0 |
||
106 | loop @b |
||
107 | push ebx eax |
||
108 | mov ebx, esp |
||
109 | shl ebp, 7 |
||
110 | xor eax, eax |
||
111 | .createrev: |
||
112 | dec cl |
||
113 | mov al, cl |
||
114 | jns @f |
||
115 | mov al, byte [encodings.tables+ebp+ecx-80h] |
||
116 | @@: |
||
117 | mov al, [esi+eax] |
||
118 | pushd [ebx+8+eax*4] |
||
119 | pushd ecx |
||
120 | mov [ebx+8+eax*4], esp |
||
121 | jnz .createrev |
||
122 | @@: |
||
123 | dec cl |
||
124 | mov al, [esi+ecx] |
||
125 | pushd [ebx+8+eax*4] |
||
126 | popd [ebx+8+ecx*4] |
||
127 | jnz @b |
||
128 | ; create FSM |
||
129 | xor ecx, ecx |
||
130 | cmp byte [ebx+259*4+8], 0 |
||
131 | mov eax, [ebx] |
||
132 | mov ebx, [ebx+4] |
||
133 | mov edi, edx |
||
134 | jz .fsmcalc |
||
135 | mov esi, isspace_table |
||
136 | push 256/4 |
||
137 | pop ecx |
||
138 | rep movsd |
||
139 | inc ecx |
||
140 | .fsmcalc: |
||
141 | movzx esi, byte [eax+ecx] |
||
142 | push eax |
||
143 | push ecx |
||
144 | push 256/4 |
||
145 | pop ecx |
||
146 | dec esi |
||
147 | js .fsmzero |
||
148 | shl esi, 8 |
||
149 | add esi, edx |
||
150 | rep movsd |
||
151 | jmp .fsmnext |
||
152 | .fsmzero: |
||
153 | cmp byte [esp+261*4+256*8+8], 0 |
||
154 | jnz .whole.fsmzero |
||
155 | xor eax, eax |
||
156 | rep stosd |
||
157 | jmp .fsmnext |
||
158 | .whole.fsmzero: |
||
159 | mov esi, edx |
||
160 | rep movsd |
||
161 | .fsmnext: |
||
162 | pop ecx |
||
163 | movzx esi, byte [ebx] |
||
164 | inc ecx |
||
165 | mov esi, [esp+4+8*256+8+esi*4] |
||
166 | @@: |
||
167 | test esi, esi |
||
168 | jz @f |
||
169 | lodsd |
||
170 | mov [edi-256+eax], cl |
||
171 | mov esi, [esi] |
||
172 | jmp @b |
||
173 | @@: |
||
174 | inc ebx |
||
175 | pop eax |
||
176 | cmp byte [ebx], 0 |
||
177 | jnz .fsmcalc |
||
178 | cmp byte [esp+259*4+256*8+8], 0 |
||
179 | jz .nowholefin |
||
180 | movzx esi, byte [eax+ecx] |
||
181 | push ecx |
||
182 | mov ecx, 256 |
||
183 | push 256/4 |
||
184 | pop ecx |
||
185 | dec esi |
||
186 | shl esi, 8 |
||
187 | add esi, edx |
||
188 | rep movsd |
||
189 | pop ecx |
||
190 | inc ecx |
||
191 | xor eax, eax |
||
192 | .whole.fsmfin: |
||
193 | cmp [isspace_table+eax], ah |
||
194 | jz @f |
||
195 | mov byte [edi-256+eax], cl |
||
196 | @@: |
||
197 | inc al |
||
198 | jnz .whole.fsmfin |
||
199 | .nowholefin: |
||
200 | ; ok, now edx -> FSM, cl = final state |
||
201 | add esp, 8*256+8+4*256 |
||
202 | pop ebp |
||
203 | ret 8=> |