;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; ;; Copyright (C) KolibriOS team 2004-2013. All rights reserved. ;; ;; Distributed under terms of the GNU General Public License ;; ;; ;; ;; ;; ;; GNU GENERAL PUBLIC LICENSE ;; ;; Version 2, June 1991 ;; ;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; get_next_byte: ; Load next byte from the packet, translating to cp866 if necessary ; At input esi = pointer to data, edx = limit of data ; Output is either (translated) byte in al with CF set or CF cleared. mov eax, [encoding] jmp [get_byte_table+eax*4] get_byte_cp866: cmp esi, edx jae .nothing lodsb .nothing: ret get_byte_cp1251: cmp esi, edx jae .nothing lodsb cmp al, 0x80 jb @f and eax, 0x7F mov al, [cp1251_table+eax] @@: stc .nothing: ret get_byte_utf8: ; UTF8 decoding is slightly complicated. ; One character can occupy one or more bytes. ; The boundary in packets theoretically can be anywhere in data, ; so this procedure keeps internal state between calls and handles ; one byte at a time, looping until character is read or packet is over. ; Globally, there are two distinct tasks: decode byte sequence to unicode char ; and convert this unicode char to our base encoding (that is cp866). ; 1. Check that there are data. cmp esi, edx jae .nothing ; 2. Load byte. lodsb movzx ecx, al ; 3. Bytes in an UTF8 sequence can be of any of three types. ; If most significant bit is cleared, sequence is one byte and usual ASCII char. ; First byte of a sequence must be 11xxxxxx, other bytes are 10yyyyyy. and al, 0xC0 jns .single_byte jp .first_byte ; 4. This byte is not first in UTF8 sequence. ; 4a. Check that the sequence was started. If no, it is invalid byte ; and we simply ignore it. cmp [utf8_bytes_rest], 0 jz get_byte_utf8 ; 4b. Otherwise, it is really next byte and it gives some more bits of char. mov eax, [utf8_char] shl eax, 6 lea eax, [eax+ecx-0x80] ; 4c. Decrement number of bytes rest in the sequence. ; If it goes to zero, character is read, so return it. dec [utf8_bytes_rest] jz .got_char mov [utf8_char], eax jmp get_byte_utf8 ; 5. If the byte is first in UTF8 sequence, calculate the number of leading 1s ; - it equals total number of bytes in the sequence; some other bits rest for ; leading bits in the character. .first_byte: mov eax, -1 @@: inc eax add cl, cl js @b mov [utf8_bytes_rest], eax xchg eax, ecx inc ecx shr al, cl mov [utf8_char], eax jmp get_byte_utf8 ; 6. If the byte is ASCII char, it is the character. .single_byte: xchg eax, ecx .got_char: ; We got the character, now abandon a possible sequence in progress. and [utf8_bytes_rest], 0 ; Now second task. The unicode character is in eax, and now we shall convert it ; to cp866. cmp eax, 0x80 jb .done ; 0x410-0x43F -> 0x80-0xAF, 0x440-0x44F -> 0xE0-0xEF, 0x401 -> 0xF0, 0x451 -> 0xF1 cmp eax, 0x401 jz .YO cmp eax, 0x451 jz .yo cmp eax, 0x410 jb .unrecognized cmp eax, 0x440 jb .part1 cmp eax, 0x450 jae .unrecognized sub al, (0x40-0xE0) and 0xFF ret .part1: sub al, 0x10-0x80 .nothing: .done: ret .unrecognized: mov al, '?' stc ret .YO: mov al, 0xF0 stc ret .yo: mov al, 0xF1 stc ret print_character: pusha cmp bl, 13 ; line beginning jne nobol mov ecx, [pos] inc ecx boll1: dec ecx mov eax, ecx xor edx, edx mov ebx, [textbox_width] div ebx test edx, edx jnz boll1 mov [pos], ecx jmp newdata nobol: cmp bl, 10 ; line down jne nolf addx1: inc [pos] mov eax, [pos] xor edx, edx mov ecx, [textbox_width] div ecx test edx, edx jnz addx1 mov eax, [pos] jmp cm1 nolf: no_lf_ret: cmp bl, 15 ; character jbe newdata mov eax, [irc_data] shl eax, 8 mov al, bl mov [irc_data], eax mov eax, [pos] ;---- draw data pusha and ebx, 0xff add eax, [text_start] mov [eax], bl popa ;---- draw data mov eax, [pos] inc eax cm1: mov ebx, [scroll+4] imul ebx, [textbox_width] cmp eax, ebx jb noeaxz mov esi, [text_start] add esi, [textbox_width] mov edi, [text_start] mov ecx, ebx rep movsb mov esi, [text_start] mov ecx, [textbox_width] imul ecx, 61 add esi, ecx mov edi, [text_start] mov ecx, [textbox_width] imul ecx, 60 add edi, ecx mov ecx, ebx rep movsb mov eax, ebx sub eax, [textbox_width] noeaxz: mov [pos], eax newdata: mov eax, [window_print] or [eax + window.flags], FLAG_UPDATED popa ret recode_to_cp866: rep movsb ret recode_to_cp1251: xor eax, eax jecxz .nothing .loop: lodsb cmp al,0x80 jb @f mov al, [cp866_table-0x80+eax] @@: stosb loop .loop .nothing: ret recode_to_utf8: jecxz .nothing .loop: lodsb cmp al, 0x80 jb .single_byte and eax, 0x7F mov ax, [utf8_table+eax*2] stosw loop .loop ret .single_byte: stosb loop .loop .nothing: ret recode: mov eax, [encoding] jmp [recode_proc+eax*4] encoding dd UTF8 recode_proc dd recode_to_cp866, recode_to_cp1251, recode_to_utf8 get_byte_table dd get_byte_cp866, get_byte_cp1251, get_byte_utf8 cp1251_table: db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; 8 db '?','?','?','?','?',$F9,'?','?' , '?','?','?','?','?','?','?','?' ; 9 db '?',$F6,$F7,'?',$FD,'?','?','?' , $F0,'?',$F2,'?','?','?','?',$F4 ; A db $F8,'?','?','?','?','?','?',$FA , $F1,$FC,$F3,'?','?','?','?',$F5 ; B db $80,$81,$82,$83,$84,$85,$86,$87 , $88,$89,$8A,$8B,$8C,$8D,$8E,$8F ; C db $90,$91,$92,$93,$94,$95,$96,$97 , $98,$99,$9A,$9B,$9C,$9D,$9E,$9F ; D db $A0,$A1,$A2,$A3,$A4,$A5,$A6,$A7 , $A8,$A9,$AA,$AB,$AC,$AD,$AE,$AF ; E db $E0,$E1,$E2,$E3,$E4,$E5,$E6,$E7 , $E8,$E9,$EA,$EB,$EC,$ED,$EE,$EF ; F ; 0 1 2 3 4 5 6 7 8 9 A B C D E F utf8_table: times 80h dw 0x98C3 ; default placeholder ; 0x80-0xAF -> 0x90D0-0xBFD0 repeat 0x30 store byte 0xD0 at utf8_table+2*(%-1) store byte 0x90+%-1 at utf8_table+2*%-1 end repeat ; 0xE0-0xEF -> 0x80D1-0x8FD1 repeat 0x10 store byte 0xD1 at utf8_table+2*(0xE0-0x80+%-1) store byte 0x80+%-1 at utf8_table+2*(0xE0-0x80+%)-1 end repeat ; 0xF0 -> 0x81D0, 0xF1 -> 0x91D1 store dword 0x91D181D0 at utf8_table+2*(0xF0-0x80) cp866_table: db $C0,$C1,$C2,$C3,$C4,$C5,$C6,$C7 , $C8,$C9,$CA,$CB,$CC,$CD,$CE,$CF ; 8 db $D0,$D1,$D2,$D3,$D4,$D5,$D6,$D7 , $D8,$D9,$DA,$DB,$DC,$DD,$DE,$DF ; 9 db $E0,$E1,$E2,$E3,$E4,$E5,$E6,$E7 , $E8,$E9,$EA,$EB,$EC,$ED,$EE,$EF ; A db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; B db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; C db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; D db $F0,$F1,$F2,$F3,$F4,$F5,$F6,$F7 , $F8,$F9,$FA,$FB,$FC,$FD,$FE,$FF ; E db $A8,$B8,$AA,$BA,$AF,$BF,$A1,$A2 , $B0,$95,$B7,'?',$B9,$A4,'?','?' ; F ; 0 1 2 3 4 5 6 7 8 9 A B C D E F