Subversion Repositories Kolibri OS

Rev

Rev 3981 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3545 hidnplayr 1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
;;                                                                 ;;
3
;; Copyright (C) KolibriOS team 2004-2013. All rights reserved.    ;;
4
;; Distributed under terms of the GNU General Public License       ;;
5
;;                                                                 ;;
6
;;                                                                 ;;
7
;;         GNU GENERAL PUBLIC LICENSE                              ;;
8
;;          Version 2, June 1991                                   ;;
9
;;                                                                 ;;
10
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11
 
12
 
13
get_next_byte:
14
; Load next byte from the packet, translating to cp866 if necessary
15
; At input esi = pointer to data, edx = limit of data
16
; Output is either (translated) byte in al with CF set or CF cleared.
17
        mov     eax, [encoding]
18
        jmp     [get_byte_table+eax*4]
19
 
20
get_byte_cp866:
21
        cmp     esi, edx
22
        jae     .nothing
23
        lodsb
24
.nothing:
25
        ret
26
 
27
get_byte_cp1251:
28
        cmp     esi, edx
29
        jae     .nothing
30
        lodsb
31
        cmp     al, 0x80
32
        jb      @f
33
        and     eax, 0x7F
34
        mov     al, [cp1251_table+eax]
35
@@:
36
        stc
37
.nothing:
38
        ret
39
 
40
get_byte_utf8:
41
; UTF8 decoding is slightly complicated.
42
; One character can occupy one or more bytes.
43
; The boundary in packets theoretically can be anywhere in data,
44
; so this procedure keeps internal state between calls and handles
45
; one byte at a time, looping until character is read or packet is over.
46
; Globally, there are two distinct tasks: decode byte sequence to unicode char
47
; and convert this unicode char to our base encoding (that is cp866).
48
; 1. Check that there are data.
49
        cmp     esi, edx
50
        jae     .nothing
51
; 2. Load byte.
52
        lodsb
53
        movzx   ecx, al
54
; 3. Bytes in an UTF8 sequence can be of any of three types.
55
; If most significant bit is cleared, sequence is one byte and usual ASCII char.
56
; First byte of a sequence must be 11xxxxxx, other bytes are 10yyyyyy.
57
        and     al, 0xC0
58
        jns     .single_byte
59
        jp      .first_byte
60
; 4. This byte is not first in UTF8 sequence.
61
; 4a. Check that the sequence was started. If no, it is invalid byte
62
; and we simply ignore it.
63
        cmp     [utf8_bytes_rest], 0
64
        jz      get_byte_utf8
65
; 4b. Otherwise, it is really next byte and it gives some more bits of char.
66
        mov     eax, [utf8_char]
67
        shl     eax, 6
68
        lea     eax, [eax+ecx-0x80]
69
; 4c. Decrement number of bytes rest in the sequence.
70
; If it goes to zero, character is read, so return it.
71
        dec     [utf8_bytes_rest]
72
        jz      .got_char
73
        mov     [utf8_char], eax
74
        jmp     get_byte_utf8
75
; 5. If the byte is first in UTF8 sequence, calculate the number of leading 1s
76
; - it equals total number of bytes in the sequence; some other bits rest for
77
; leading bits in the character.
78
.first_byte:
79
        mov     eax, -1
80
@@:
81
        inc     eax
82
        add     cl, cl
83
        js      @b
84
        mov     [utf8_bytes_rest], eax
85
        xchg    eax, ecx
86
        inc     ecx
87
        shr     al, cl
88
        mov     [utf8_char], eax
89
        jmp     get_byte_utf8
90
; 6. If the byte is ASCII char, it is the character.
91
.single_byte:
92
        xchg    eax, ecx
93
.got_char:
94
; We got the character, now abandon a possible sequence in progress.
95
        and     [utf8_bytes_rest], 0
96
; Now second task. The unicode character is in eax, and now we shall convert it
97
; to cp866.
98
        cmp     eax, 0x80
99
        jb      .done
100
; 0x410-0x43F -> 0x80-0xAF, 0x440-0x44F -> 0xE0-0xEF, 0x401 -> 0xF0, 0x451 -> 0xF1
101
        cmp     eax, 0x401
102
        jz      .YO
103
        cmp     eax, 0x451
104
        jz      .yo
105
        cmp     eax, 0x410
106
        jb      .unrecognized
107
        cmp     eax, 0x440
108
        jb      .part1
109
        cmp     eax, 0x450
110
        jae     .unrecognized
111
        sub     al, (0x40-0xE0) and 0xFF
112
        ret
113
.part1:
114
        sub     al, 0x10-0x80
115
.nothing:
116
.done:
117
        ret
118
.unrecognized:
119
        mov     al, '?'
120
        stc
121
        ret
122
.YO:
123
        mov     al, 0xF0
124
        stc
125
        ret
126
.yo:
127
        mov     al, 0xF1
128
        stc
129
        ret
130
 
131
 
132
 
133
print_character:
134
 
135
        pusha
136
 
137
        cmp     bl, 13          ; line beginning
138
        jne     nobol
139
 
140
        mov     ecx, [pos]
141
        inc     ecx
142
  boll1:
143
        dec     ecx
144
        mov     eax, ecx
145
        xor     edx, edx
146
        mov     ebx, [textbox_width]
147
        div     ebx
148
        test    edx, edx
149
        jnz     boll1
150
        mov     [pos], ecx
151
        jmp     newdata
152
  nobol:
153
 
154
        cmp     bl, 10          ; line down
155
        jne     nolf
156
 
157
   addx1:
158
        inc     [pos]
159
        mov     eax, [pos]
160
        xor     edx, edx
161
        mov     ecx, [textbox_width]
162
        div     ecx
163
        test    edx, edx
164
        jnz     addx1
165
        mov     eax, [pos]
166
        jmp     cm1
167
  nolf:
168
  no_lf_ret:
169
 
170
 
171
        cmp     bl, 15          ; character
172
        jbe     newdata
173
 
174
        mov     eax, [irc_data]
175
        shl     eax, 8
176
        mov     al, bl
177
        mov     [irc_data], eax
178
 
179
        mov     eax, [pos]
180
    ;---- draw data
181
        pusha
182
 
183
        and     ebx, 0xff
184
        add     eax, [text_start]
185
        mov     [eax], bl
186
 
187
        popa
188
    ;---- draw data
189
 
190
        mov     eax, [pos]
191
        inc     eax
192
  cm1:
193
        mov     ebx, [scroll+4]
194
        imul    ebx, [textbox_width]
195
        cmp     eax, ebx
196
        jb      noeaxz
197
 
198
        mov     esi, [text_start]
199
        add     esi, [textbox_width]
200
 
201
        mov     edi, [text_start]
202
        mov     ecx, ebx
203
        rep     movsb
204
 
205
        mov     esi, [text_start]
206
        mov     ecx, [textbox_width]
207
        imul    ecx, 61
208
        add     esi, ecx
209
 
210
        mov     edi, [text_start]
211
        mov     ecx, [textbox_width]
212
        imul    ecx, 60
213
        add     edi, ecx
214
        mov     ecx, ebx
215
        rep     movsb
216
 
217
        mov     eax, ebx
218
        sub     eax, [textbox_width]
219
  noeaxz:
220
        mov     [pos], eax
221
 
222
  newdata:
223
        mov     eax, [window_print]
224
        or      [eax + window.flags], FLAG_UPDATED
225
 
226
        popa
227
        ret
228
 
229
 
230
 
231
recode_to_cp866:
232
        rep     movsb
233
        ret
234
 
235
recode_to_cp1251:
236
        xor     eax, eax
237
        jecxz   .nothing
238
  .loop:
239
        lodsb
240
        cmp     al,0x80
241
        jb      @f
242
        mov     al, [cp866_table-0x80+eax]
243
    @@: stosb
244
        loop    .loop
245
  .nothing:
246
        ret
247
 
248
recode_to_utf8:
249
        jecxz   .nothing
250
  .loop:
251
        lodsb
252
        cmp     al, 0x80
253
        jb      .single_byte
254
        and     eax, 0x7F
255
        mov     ax, [utf8_table+eax*2]
256
        stosw
257
        loop    .loop
258
        ret
259
  .single_byte:
260
        stosb
261
        loop    .loop
262
  .nothing:
263
        ret
264
 
265
recode:
266
        mov     eax, [encoding]
267
        jmp     [recode_proc+eax*4]
268
 
269
 
270
 
271
encoding        dd      UTF8
272
recode_proc     dd      recode_to_cp866, recode_to_cp1251, recode_to_utf8
273
get_byte_table  dd      get_byte_cp866, get_byte_cp1251, get_byte_utf8
274
 
275
 
276
cp1251_table:
277
  db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; 8
278
  db '?','?','?','?','?',$F9,'?','?' , '?','?','?','?','?','?','?','?' ; 9
279
  db '?',$F6,$F7,'?',$FD,'?','?','?' , $F0,'?',$F2,'?','?','?','?',$F4 ; A
280
  db $F8,'?','?','?','?','?','?',$FA , $F1,$FC,$F3,'?','?','?','?',$F5 ; B
281
  db $80,$81,$82,$83,$84,$85,$86,$87 , $88,$89,$8A,$8B,$8C,$8D,$8E,$8F ; C
282
  db $90,$91,$92,$93,$94,$95,$96,$97 , $98,$99,$9A,$9B,$9C,$9D,$9E,$9F ; D
283
  db $A0,$A1,$A2,$A3,$A4,$A5,$A6,$A7 , $A8,$A9,$AA,$AB,$AC,$AD,$AE,$AF ; E
284
  db $E0,$E1,$E2,$E3,$E4,$E5,$E6,$E7 , $E8,$E9,$EA,$EB,$EC,$ED,$EE,$EF ; F
285
 
286
;    0   1   2   3   4   5   6   7     8   9   A   B   C   D   E   F
287
 
288
utf8_table:
289
        times 80h dw 0x98C3     ; default placeholder
290
 
291
; 0x80-0xAF -> 0x90D0-0xBFD0
292
repeat 0x30
293
        store byte 0xD0 at utf8_table+2*(%-1)
294
        store byte 0x90+%-1 at utf8_table+2*%-1
295
end repeat
296
 
297
; 0xE0-0xEF -> 0x80D1-0x8FD1
298
repeat 0x10
299
        store byte 0xD1 at utf8_table+2*(0xE0-0x80+%-1)
300
        store byte 0x80+%-1 at utf8_table+2*(0xE0-0x80+%)-1
301
end repeat
302
 
303
; 0xF0 -> 0x81D0, 0xF1 -> 0x91D1
304
        store dword 0x91D181D0 at utf8_table+2*(0xF0-0x80)
305
 
306
cp866_table:
307
  db $C0,$C1,$C2,$C3,$C4,$C5,$C6,$C7 , $C8,$C9,$CA,$CB,$CC,$CD,$CE,$CF ; 8
308
  db $D0,$D1,$D2,$D3,$D4,$D5,$D6,$D7 , $D8,$D9,$DA,$DB,$DC,$DD,$DE,$DF ; 9
309
  db $E0,$E1,$E2,$E3,$E4,$E5,$E6,$E7 , $E8,$E9,$EA,$EB,$EC,$ED,$EE,$EF ; A
310
  db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; B
311
  db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; C
312
  db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; D
313
  db $F0,$F1,$F2,$F3,$F4,$F5,$F6,$F7 , $F8,$F9,$FA,$FB,$FC,$FD,$FE,$FF ; E
314
  db $A8,$B8,$AA,$BA,$AF,$BF,$A1,$A2 , $B0,$95,$B7,'?',$B9,$A4,'?','?' ; F
315
 
316
;    0   1   2   3   4   5   6   7     8   9   A   B   C   D   E   F
317