Rev 3545 | Rev 4060 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
3545 | hidnplayr | 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ;; ;; |
||
3 | ;; Copyright (C) KolibriOS team 2004-2013. All rights reserved. ;; |
||
4 | ;; Distributed under terms of the GNU General Public License ;; |
||
5 | ;; ;; |
||
6 | ;; ;; |
||
7 | ;; GNU GENERAL PUBLIC LICENSE ;; |
||
8 | ;; Version 2, June 1991 ;; |
||
9 | ;; ;; |
||
10 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
||
11 | |||
12 | |||
13 | get_next_byte: |
||
14 | ; Load next byte from the packet, translating to cp866 if necessary |
||
15 | ; At input esi = pointer to data, edx = limit of data |
||
16 | ; Output is either (translated) byte in al with CF set or CF cleared. |
||
17 | mov eax, [encoding] |
||
18 | jmp [get_byte_table+eax*4] |
||
19 | |||
20 | get_byte_cp866: |
||
21 | cmp esi, edx |
||
22 | jae .nothing |
||
23 | lodsb |
||
24 | .nothing: |
||
25 | ret |
||
26 | |||
27 | get_byte_cp1251: |
||
28 | cmp esi, edx |
||
29 | jae .nothing |
||
30 | lodsb |
||
31 | cmp al, 0x80 |
||
32 | jb @f |
||
33 | and eax, 0x7F |
||
34 | mov al, [cp1251_table+eax] |
||
35 | @@: |
||
36 | stc |
||
37 | .nothing: |
||
38 | ret |
||
39 | |||
40 | get_byte_utf8: |
||
41 | ; UTF8 decoding is slightly complicated. |
||
42 | ; One character can occupy one or more bytes. |
||
43 | ; The boundary in packets theoretically can be anywhere in data, |
||
44 | ; so this procedure keeps internal state between calls and handles |
||
45 | ; one byte at a time, looping until character is read or packet is over. |
||
46 | ; Globally, there are two distinct tasks: decode byte sequence to unicode char |
||
47 | ; and convert this unicode char to our base encoding (that is cp866). |
||
48 | ; 1. Check that there are data. |
||
49 | cmp esi, edx |
||
50 | jae .nothing |
||
51 | ; 2. Load byte. |
||
52 | lodsb |
||
53 | movzx ecx, al |
||
54 | ; 3. Bytes in an UTF8 sequence can be of any of three types. |
||
55 | ; If most significant bit is cleared, sequence is one byte and usual ASCII char. |
||
56 | ; First byte of a sequence must be 11xxxxxx, other bytes are 10yyyyyy. |
||
57 | and al, 0xC0 |
||
58 | jns .single_byte |
||
59 | jp .first_byte |
||
60 | ; 4. This byte is not first in UTF8 sequence. |
||
61 | ; 4a. Check that the sequence was started. If no, it is invalid byte |
||
62 | ; and we simply ignore it. |
||
63 | cmp [utf8_bytes_rest], 0 |
||
64 | jz get_byte_utf8 |
||
65 | ; 4b. Otherwise, it is really next byte and it gives some more bits of char. |
||
66 | mov eax, [utf8_char] |
||
67 | shl eax, 6 |
||
68 | lea eax, [eax+ecx-0x80] |
||
69 | ; 4c. Decrement number of bytes rest in the sequence. |
||
70 | ; If it goes to zero, character is read, so return it. |
||
71 | dec [utf8_bytes_rest] |
||
72 | jz .got_char |
||
73 | mov [utf8_char], eax |
||
74 | jmp get_byte_utf8 |
||
75 | ; 5. If the byte is first in UTF8 sequence, calculate the number of leading 1s |
||
76 | ; - it equals total number of bytes in the sequence; some other bits rest for |
||
77 | ; leading bits in the character. |
||
78 | .first_byte: |
||
79 | mov eax, -1 |
||
80 | @@: |
||
81 | inc eax |
||
82 | add cl, cl |
||
83 | js @b |
||
84 | mov [utf8_bytes_rest], eax |
||
85 | xchg eax, ecx |
||
86 | inc ecx |
||
87 | shr al, cl |
||
88 | mov [utf8_char], eax |
||
89 | jmp get_byte_utf8 |
||
90 | ; 6. If the byte is ASCII char, it is the character. |
||
91 | .single_byte: |
||
92 | xchg eax, ecx |
||
93 | .got_char: |
||
94 | ; We got the character, now abandon a possible sequence in progress. |
||
95 | and [utf8_bytes_rest], 0 |
||
96 | ; Now second task. The unicode character is in eax, and now we shall convert it |
||
97 | ; to cp866. |
||
98 | cmp eax, 0x80 |
||
99 | jb .done |
||
100 | ; 0x410-0x43F -> 0x80-0xAF, 0x440-0x44F -> 0xE0-0xEF, 0x401 -> 0xF0, 0x451 -> 0xF1 |
||
101 | cmp eax, 0x401 |
||
102 | jz .YO |
||
103 | cmp eax, 0x451 |
||
104 | jz .yo |
||
105 | cmp eax, 0x410 |
||
106 | jb .unrecognized |
||
107 | cmp eax, 0x440 |
||
108 | jb .part1 |
||
109 | cmp eax, 0x450 |
||
110 | jae .unrecognized |
||
111 | sub al, (0x40-0xE0) and 0xFF |
||
112 | ret |
||
113 | .part1: |
||
114 | sub al, 0x10-0x80 |
||
115 | .nothing: |
||
116 | .done: |
||
117 | ret |
||
118 | .unrecognized: |
||
119 | mov al, '?' |
||
120 | stc |
||
121 | ret |
||
122 | .YO: |
||
123 | mov al, 0xF0 |
||
124 | stc |
||
125 | ret |
||
126 | .yo: |
||
127 | mov al, 0xF1 |
||
128 | stc |
||
129 | ret |
||
130 | |||
131 | |||
132 | |||
133 | print_character: |
||
134 | |||
135 | pusha |
||
136 | |||
137 | cmp bl, 13 ; line beginning |
||
138 | jne nobol |
||
139 | |||
140 | mov ecx, [pos] |
||
141 | inc ecx |
||
142 | boll1: |
||
143 | dec ecx |
||
144 | mov eax, ecx |
||
145 | xor edx, edx |
||
146 | mov ebx, [textbox_width] |
||
147 | div ebx |
||
148 | test edx, edx |
||
149 | jnz boll1 |
||
150 | mov [pos], ecx |
||
151 | jmp newdata |
||
152 | nobol: |
||
153 | |||
154 | cmp bl, 10 ; line down |
||
155 | jne nolf |
||
156 | |||
157 | addx1: |
||
158 | inc [pos] |
||
159 | mov eax, [pos] |
||
160 | xor edx, edx |
||
161 | mov ecx, [textbox_width] |
||
162 | div ecx |
||
163 | test edx, edx |
||
164 | jnz addx1 |
||
165 | mov eax, [pos] |
||
166 | jmp cm1 |
||
167 | nolf: |
||
168 | no_lf_ret: |
||
169 | |||
170 | |||
171 | cmp bl, 15 ; character |
||
172 | jbe newdata |
||
173 | |||
174 | mov eax, [irc_data] |
||
175 | shl eax, 8 |
||
176 | mov al, bl |
||
177 | mov [irc_data], eax |
||
178 | |||
179 | mov eax, [pos] |
||
180 | ;---- draw data |
||
181 | pusha |
||
182 | |||
183 | and ebx, 0xff |
||
184 | add eax, [text_start] |
||
185 | mov [eax], bl |
||
186 | |||
187 | popa |
||
188 | ;---- draw data |
||
189 | |||
190 | mov eax, [pos] |
||
191 | inc eax |
||
192 | cm1: |
||
193 | mov ebx, [scroll+4] |
||
194 | imul ebx, [textbox_width] |
||
195 | cmp eax, ebx |
||
196 | jb noeaxz |
||
197 | |||
198 | mov esi, [text_start] |
||
199 | add esi, [textbox_width] |
||
200 | |||
201 | mov edi, [text_start] |
||
202 | mov ecx, ebx |
||
203 | rep movsb |
||
204 | |||
205 | mov esi, [text_start] |
||
206 | mov ecx, [textbox_width] |
||
207 | imul ecx, 61 |
||
208 | add esi, ecx |
||
209 | |||
210 | mov edi, [text_start] |
||
211 | mov ecx, [textbox_width] |
||
212 | imul ecx, 60 |
||
213 | add edi, ecx |
||
214 | mov ecx, ebx |
||
215 | rep movsb |
||
216 | |||
217 | mov eax, ebx |
||
218 | sub eax, [textbox_width] |
||
219 | noeaxz: |
||
220 | mov [pos], eax |
||
221 | |||
222 | newdata: |
||
3981 | hidnplayr | 223 | call window_is_updated |
3545 | hidnplayr | 224 | |
225 | popa |
||
226 | ret |
||
227 | |||
228 | |||
229 | |||
230 | recode_to_cp866: |
||
231 | rep movsb |
||
232 | ret |
||
233 | |||
234 | recode_to_cp1251: |
||
235 | xor eax, eax |
||
236 | jecxz .nothing |
||
237 | .loop: |
||
238 | lodsb |
||
239 | cmp al,0x80 |
||
240 | jb @f |
||
241 | mov al, [cp866_table-0x80+eax] |
||
242 | @@: stosb |
||
243 | loop .loop |
||
244 | .nothing: |
||
245 | ret |
||
246 | |||
247 | recode_to_utf8: |
||
248 | jecxz .nothing |
||
249 | .loop: |
||
250 | lodsb |
||
251 | cmp al, 0x80 |
||
252 | jb .single_byte |
||
253 | and eax, 0x7F |
||
254 | mov ax, [utf8_table+eax*2] |
||
255 | stosw |
||
256 | loop .loop |
||
257 | ret |
||
258 | .single_byte: |
||
259 | stosb |
||
260 | loop .loop |
||
261 | .nothing: |
||
262 | ret |
||
263 | |||
264 | recode: |
||
265 | mov eax, [encoding] |
||
266 | jmp [recode_proc+eax*4] |
||
267 | |||
268 | |||
269 | |||
270 | encoding dd UTF8 |
||
271 | recode_proc dd recode_to_cp866, recode_to_cp1251, recode_to_utf8 |
||
272 | get_byte_table dd get_byte_cp866, get_byte_cp1251, get_byte_utf8 |
||
273 | |||
274 | |||
275 | cp1251_table: |
||
276 | db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; 8 |
||
277 | db '?','?','?','?','?',$F9,'?','?' , '?','?','?','?','?','?','?','?' ; 9 |
||
278 | db '?',$F6,$F7,'?',$FD,'?','?','?' , $F0,'?',$F2,'?','?','?','?',$F4 ; A |
||
279 | db $F8,'?','?','?','?','?','?',$FA , $F1,$FC,$F3,'?','?','?','?',$F5 ; B |
||
280 | db $80,$81,$82,$83,$84,$85,$86,$87 , $88,$89,$8A,$8B,$8C,$8D,$8E,$8F ; C |
||
281 | db $90,$91,$92,$93,$94,$95,$96,$97 , $98,$99,$9A,$9B,$9C,$9D,$9E,$9F ; D |
||
282 | db $A0,$A1,$A2,$A3,$A4,$A5,$A6,$A7 , $A8,$A9,$AA,$AB,$AC,$AD,$AE,$AF ; E |
||
283 | db $E0,$E1,$E2,$E3,$E4,$E5,$E6,$E7 , $E8,$E9,$EA,$EB,$EC,$ED,$EE,$EF ; F |
||
284 | |||
285 | ; 0 1 2 3 4 5 6 7 8 9 A B C D E F |
||
286 | |||
287 | utf8_table: |
||
288 | times 80h dw 0x98C3 ; default placeholder |
||
289 | |||
290 | ; 0x80-0xAF -> 0x90D0-0xBFD0 |
||
291 | repeat 0x30 |
||
292 | store byte 0xD0 at utf8_table+2*(%-1) |
||
293 | store byte 0x90+%-1 at utf8_table+2*%-1 |
||
294 | end repeat |
||
295 | |||
296 | ; 0xE0-0xEF -> 0x80D1-0x8FD1 |
||
297 | repeat 0x10 |
||
298 | store byte 0xD1 at utf8_table+2*(0xE0-0x80+%-1) |
||
299 | store byte 0x80+%-1 at utf8_table+2*(0xE0-0x80+%)-1 |
||
300 | end repeat |
||
301 | |||
302 | ; 0xF0 -> 0x81D0, 0xF1 -> 0x91D1 |
||
303 | store dword 0x91D181D0 at utf8_table+2*(0xF0-0x80) |
||
304 | |||
305 | cp866_table: |
||
306 | db $C0,$C1,$C2,$C3,$C4,$C5,$C6,$C7 , $C8,$C9,$CA,$CB,$CC,$CD,$CE,$CF ; 8 |
||
307 | db $D0,$D1,$D2,$D3,$D4,$D5,$D6,$D7 , $D8,$D9,$DA,$DB,$DC,$DD,$DE,$DF ; 9 |
||
308 | db $E0,$E1,$E2,$E3,$E4,$E5,$E6,$E7 , $E8,$E9,$EA,$EB,$EC,$ED,$EE,$EF ; A |
||
309 | db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; B |
||
310 | db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; C |
||
311 | db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; D |
||
312 | db $F0,$F1,$F2,$F3,$F4,$F5,$F6,$F7 , $F8,$F9,$FA,$FB,$FC,$FD,$FE,$FF ; E |
||
313 | db $A8,$B8,$AA,$BA,$AF,$BF,$A1,$A2 , $B0,$95,$B7,'?',$B9,$A4,'?','?' ; F |
||
314 | |||
315 | ; 0 1 2 3 4 5 6 7 8 9 A B C D E F |
||
316 |