Subversion Repositories Kolibri OS

Rev

Rev 3545 | Rev 4060 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3545 hidnplayr 1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
;;                                                                 ;;
3
;; Copyright (C) KolibriOS team 2004-2013. All rights reserved.    ;;
4
;; Distributed under terms of the GNU General Public License       ;;
5
;;                                                                 ;;
6
;;                                                                 ;;
7
;;         GNU GENERAL PUBLIC LICENSE                              ;;
8
;;          Version 2, June 1991                                   ;;
9
;;                                                                 ;;
10
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11
 
12
 
13
get_next_byte:
14
; Load next byte from the packet, translating to cp866 if necessary
15
; At input esi = pointer to data, edx = limit of data
16
; Output is either (translated) byte in al with CF set or CF cleared.
17
        mov     eax, [encoding]
18
        jmp     [get_byte_table+eax*4]
19
 
20
get_byte_cp866:
21
        cmp     esi, edx
22
        jae     .nothing
23
        lodsb
24
.nothing:
25
        ret
26
 
27
get_byte_cp1251:
28
        cmp     esi, edx
29
        jae     .nothing
30
        lodsb
31
        cmp     al, 0x80
32
        jb      @f
33
        and     eax, 0x7F
34
        mov     al, [cp1251_table+eax]
35
@@:
36
        stc
37
.nothing:
38
        ret
39
 
40
get_byte_utf8:
41
; UTF8 decoding is slightly complicated.
42
; One character can occupy one or more bytes.
43
; The boundary in packets theoretically can be anywhere in data,
44
; so this procedure keeps internal state between calls and handles
45
; one byte at a time, looping until character is read or packet is over.
46
; Globally, there are two distinct tasks: decode byte sequence to unicode char
47
; and convert this unicode char to our base encoding (that is cp866).
48
; 1. Check that there are data.
49
        cmp     esi, edx
50
        jae     .nothing
51
; 2. Load byte.
52
        lodsb
53
        movzx   ecx, al
54
; 3. Bytes in an UTF8 sequence can be of any of three types.
55
; If most significant bit is cleared, sequence is one byte and usual ASCII char.
56
; First byte of a sequence must be 11xxxxxx, other bytes are 10yyyyyy.
57
        and     al, 0xC0
58
        jns     .single_byte
59
        jp      .first_byte
60
; 4. This byte is not first in UTF8 sequence.
61
; 4a. Check that the sequence was started. If no, it is invalid byte
62
; and we simply ignore it.
63
        cmp     [utf8_bytes_rest], 0
64
        jz      get_byte_utf8
65
; 4b. Otherwise, it is really next byte and it gives some more bits of char.
66
        mov     eax, [utf8_char]
67
        shl     eax, 6
68
        lea     eax, [eax+ecx-0x80]
69
; 4c. Decrement number of bytes rest in the sequence.
70
; If it goes to zero, character is read, so return it.
71
        dec     [utf8_bytes_rest]
72
        jz      .got_char
73
        mov     [utf8_char], eax
74
        jmp     get_byte_utf8
75
; 5. If the byte is first in UTF8 sequence, calculate the number of leading 1s
76
; - it equals total number of bytes in the sequence; some other bits rest for
77
; leading bits in the character.
78
.first_byte:
79
        mov     eax, -1
80
@@:
81
        inc     eax
82
        add     cl, cl
83
        js      @b
84
        mov     [utf8_bytes_rest], eax
85
        xchg    eax, ecx
86
        inc     ecx
87
        shr     al, cl
88
        mov     [utf8_char], eax
89
        jmp     get_byte_utf8
90
; 6. If the byte is ASCII char, it is the character.
91
.single_byte:
92
        xchg    eax, ecx
93
.got_char:
94
; We got the character, now abandon a possible sequence in progress.
95
        and     [utf8_bytes_rest], 0
96
; Now second task. The unicode character is in eax, and now we shall convert it
97
; to cp866.
98
        cmp     eax, 0x80
99
        jb      .done
100
; 0x410-0x43F -> 0x80-0xAF, 0x440-0x44F -> 0xE0-0xEF, 0x401 -> 0xF0, 0x451 -> 0xF1
101
        cmp     eax, 0x401
102
        jz      .YO
103
        cmp     eax, 0x451
104
        jz      .yo
105
        cmp     eax, 0x410
106
        jb      .unrecognized
107
        cmp     eax, 0x440
108
        jb      .part1
109
        cmp     eax, 0x450
110
        jae     .unrecognized
111
        sub     al, (0x40-0xE0) and 0xFF
112
        ret
113
.part1:
114
        sub     al, 0x10-0x80
115
.nothing:
116
.done:
117
        ret
118
.unrecognized:
119
        mov     al, '?'
120
        stc
121
        ret
122
.YO:
123
        mov     al, 0xF0
124
        stc
125
        ret
126
.yo:
127
        mov     al, 0xF1
128
        stc
129
        ret
130
 
131
 
132
 
133
print_character:
134
 
135
        pusha
136
 
137
        cmp     bl, 13          ; line beginning
138
        jne     nobol
139
 
140
        mov     ecx, [pos]
141
        inc     ecx
142
  boll1:
143
        dec     ecx
144
        mov     eax, ecx
145
        xor     edx, edx
146
        mov     ebx, [textbox_width]
147
        div     ebx
148
        test    edx, edx
149
        jnz     boll1
150
        mov     [pos], ecx
151
        jmp     newdata
152
  nobol:
153
 
154
        cmp     bl, 10          ; line down
155
        jne     nolf
156
 
157
   addx1:
158
        inc     [pos]
159
        mov     eax, [pos]
160
        xor     edx, edx
161
        mov     ecx, [textbox_width]
162
        div     ecx
163
        test    edx, edx
164
        jnz     addx1
165
        mov     eax, [pos]
166
        jmp     cm1
167
  nolf:
168
  no_lf_ret:
169
 
170
 
171
        cmp     bl, 15          ; character
172
        jbe     newdata
173
 
174
        mov     eax, [irc_data]
175
        shl     eax, 8
176
        mov     al, bl
177
        mov     [irc_data], eax
178
 
179
        mov     eax, [pos]
180
    ;---- draw data
181
        pusha
182
 
183
        and     ebx, 0xff
184
        add     eax, [text_start]
185
        mov     [eax], bl
186
 
187
        popa
188
    ;---- draw data
189
 
190
        mov     eax, [pos]
191
        inc     eax
192
  cm1:
193
        mov     ebx, [scroll+4]
194
        imul    ebx, [textbox_width]
195
        cmp     eax, ebx
196
        jb      noeaxz
197
 
198
        mov     esi, [text_start]
199
        add     esi, [textbox_width]
200
 
201
        mov     edi, [text_start]
202
        mov     ecx, ebx
203
        rep     movsb
204
 
205
        mov     esi, [text_start]
206
        mov     ecx, [textbox_width]
207
        imul    ecx, 61
208
        add     esi, ecx
209
 
210
        mov     edi, [text_start]
211
        mov     ecx, [textbox_width]
212
        imul    ecx, 60
213
        add     edi, ecx
214
        mov     ecx, ebx
215
        rep     movsb
216
 
217
        mov     eax, ebx
218
        sub     eax, [textbox_width]
219
  noeaxz:
220
        mov     [pos], eax
221
 
222
  newdata:
3981 hidnplayr 223
        call    window_is_updated
3545 hidnplayr 224
 
225
        popa
226
        ret
227
 
228
 
229
 
230
recode_to_cp866:
231
        rep     movsb
232
        ret
233
 
234
recode_to_cp1251:
235
        xor     eax, eax
236
        jecxz   .nothing
237
  .loop:
238
        lodsb
239
        cmp     al,0x80
240
        jb      @f
241
        mov     al, [cp866_table-0x80+eax]
242
    @@: stosb
243
        loop    .loop
244
  .nothing:
245
        ret
246
 
247
recode_to_utf8:
248
        jecxz   .nothing
249
  .loop:
250
        lodsb
251
        cmp     al, 0x80
252
        jb      .single_byte
253
        and     eax, 0x7F
254
        mov     ax, [utf8_table+eax*2]
255
        stosw
256
        loop    .loop
257
        ret
258
  .single_byte:
259
        stosb
260
        loop    .loop
261
  .nothing:
262
        ret
263
 
264
recode:
265
        mov     eax, [encoding]
266
        jmp     [recode_proc+eax*4]
267
 
268
 
269
 
270
encoding        dd      UTF8
271
recode_proc     dd      recode_to_cp866, recode_to_cp1251, recode_to_utf8
272
get_byte_table  dd      get_byte_cp866, get_byte_cp1251, get_byte_utf8
273
 
274
 
275
cp1251_table:
276
  db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; 8
277
  db '?','?','?','?','?',$F9,'?','?' , '?','?','?','?','?','?','?','?' ; 9
278
  db '?',$F6,$F7,'?',$FD,'?','?','?' , $F0,'?',$F2,'?','?','?','?',$F4 ; A
279
  db $F8,'?','?','?','?','?','?',$FA , $F1,$FC,$F3,'?','?','?','?',$F5 ; B
280
  db $80,$81,$82,$83,$84,$85,$86,$87 , $88,$89,$8A,$8B,$8C,$8D,$8E,$8F ; C
281
  db $90,$91,$92,$93,$94,$95,$96,$97 , $98,$99,$9A,$9B,$9C,$9D,$9E,$9F ; D
282
  db $A0,$A1,$A2,$A3,$A4,$A5,$A6,$A7 , $A8,$A9,$AA,$AB,$AC,$AD,$AE,$AF ; E
283
  db $E0,$E1,$E2,$E3,$E4,$E5,$E6,$E7 , $E8,$E9,$EA,$EB,$EC,$ED,$EE,$EF ; F
284
 
285
;    0   1   2   3   4   5   6   7     8   9   A   B   C   D   E   F
286
 
287
utf8_table:
288
        times 80h dw 0x98C3     ; default placeholder
289
 
290
; 0x80-0xAF -> 0x90D0-0xBFD0
291
repeat 0x30
292
        store byte 0xD0 at utf8_table+2*(%-1)
293
        store byte 0x90+%-1 at utf8_table+2*%-1
294
end repeat
295
 
296
; 0xE0-0xEF -> 0x80D1-0x8FD1
297
repeat 0x10
298
        store byte 0xD1 at utf8_table+2*(0xE0-0x80+%-1)
299
        store byte 0x80+%-1 at utf8_table+2*(0xE0-0x80+%)-1
300
end repeat
301
 
302
; 0xF0 -> 0x81D0, 0xF1 -> 0x91D1
303
        store dword 0x91D181D0 at utf8_table+2*(0xF0-0x80)
304
 
305
cp866_table:
306
  db $C0,$C1,$C2,$C3,$C4,$C5,$C6,$C7 , $C8,$C9,$CA,$CB,$CC,$CD,$CE,$CF ; 8
307
  db $D0,$D1,$D2,$D3,$D4,$D5,$D6,$D7 , $D8,$D9,$DA,$DB,$DC,$DD,$DE,$DF ; 9
308
  db $E0,$E1,$E2,$E3,$E4,$E5,$E6,$E7 , $E8,$E9,$EA,$EB,$EC,$ED,$EE,$EF ; A
309
  db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; B
310
  db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; C
311
  db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; D
312
  db $F0,$F1,$F2,$F3,$F4,$F5,$F6,$F7 , $F8,$F9,$FA,$FB,$FC,$FD,$FE,$FF ; E
313
  db $A8,$B8,$AA,$BA,$AF,$BF,$A1,$A2 , $B0,$95,$B7,'?',$B9,$A4,'?','?' ; F
314
 
315
;    0   1   2   3   4   5   6   7     8   9   A   B   C   D   E   F
316