WebSVN – Kolibri OS – Path Comparison – / – /programs/network/ssh/encodings.inc Rev 9105 and /programs/network/ssh/encodings.inc Rev 9106

Regard whitespace Rev 9105 → Rev 9106

 /programs/network/ssh/encodings.inc
 ,0 → 1,292
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;                                                                 ;;
+;; Copyright (C) KolibriOS team 2004-2013. All rights reserved.    ;;
+;; Distributed under terms of the GNU General Public License       ;;
+;;                                                                 ;;
+;;   Written by CleverMouse                                        ;;
+;;                                                                 ;;
+;;         GNU GENERAL PUBLIC LICENSE                              ;;
+;;          Version 2, June 1991                                   ;;
+;;                                                                 ;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+uglobal
+utf8_bytes_rest dd ?                    ; bytes rest in current UTF8 sequence
+utf8_char       dd ?                    ; first bits of current UTF8 character
+endg
+;get_next_byte:
+;; Load next byte from the packet, translating to cp866 if necessary
+;; At input esi = pointer to data, edx = limit of data
+;; Output is either (translated) byte in al with CF set or CF cleared.
+;        mov     eax, [encoding]
+;        jmp     [get_byte_table+eax*4]
+;
+;get_byte_cp866:
+;        cmp     esi, edx
+;        jae     .nothing
+;        lodsb
+;.nothing:
+;        ret
+;
+;get_byte_cp1251:
+;        cmp     esi, edx
+;        jae     .nothing
+;        lodsb
+;        cmp     al, 0x80
+;        jb      @f
+;        and     eax, 0x7F
+;        mov     al, [cp1251_table+eax]
+;@@:
+;        stc
+;.nothing:
+;        ret
+get_byte_utf8:
+; UTF8 decoding is slightly complicated.
+; One character can occupy one or more bytes.
+; The boundary in packets theoretically can be anywhere in data,
+; so this procedure keeps internal state between calls and handles
+; one byte at a time, looping until character is read or packet is over.
+; Globally, there are two distinct tasks: decode byte sequence to unicode char
+; and convert this unicode char to our base encoding (that is cp866).
+; 1. Check that there are data.
+        cmp     esi, edx
+        jae     .nothing
+; 2. Load byte.
+        lodsb
+        movzx   ecx, al
+; 3. Bytes in an UTF8 sequence can be of any of three types.
+; If most significant bit is cleared, sequence is one byte and usual ASCII char.
+; First byte of a sequence must be 11xxxxxx, other bytes are 10yyyyyy.
+        and     al, 0xC0
+        jns     .single_byte
+        jp      .first_byte
+; 4. This byte is not first in UTF8 sequence.
+; 4a. Check that the sequence was started. If no, it is invalid byte
+; and we simply ignore it.
+        cmp     [utf8_bytes_rest], 0
+        jz      get_byte_utf8
+; 4b. Otherwise, it is really next byte and it gives some more bits of char.
+        mov     eax, [utf8_char]
+        shl     eax, 6
+        lea     eax, [eax+ecx-0x80]
+; 4c. Decrement number of bytes rest in the sequence.
+; If it goes to zero, character is read, so return it.
+        dec     [utf8_bytes_rest]
+        jz      .got_char
+        mov     [utf8_char], eax
+        jmp     get_byte_utf8
+; 5. If the byte is first in UTF8 sequence, calculate the number of leading 1s
+; - it equals total number of bytes in the sequence; some other bits rest for
+; leading bits in the character.
+.first_byte:
+        mov     eax, -1
+@@:
+        inc     eax
+        add     cl, cl
+        js      @b
+        mov     [utf8_bytes_rest], eax
+        xchg    eax, ecx
+        inc     ecx
+        shr     al, cl
+        mov     [utf8_char], eax
+        jmp     get_byte_utf8
+; 6. If the byte is ASCII char, it is the character.
+.single_byte:
+        xchg    eax, ecx
+.got_char:
+; We got the character, now abandon a possible sequence in progress.
+        and     [utf8_bytes_rest], 0
+; Now second task. The unicode character is in eax, and now we shall convert it
+; to cp866.
+        cmp     eax, 0x80
+        jb      .done
+; 0x410-0x43F -> 0x80-0xAF, 0x440-0x44F -> 0xE0-0xEF, 0x401 -> 0xF0, 0x451 -> 0xF1
+        cmp     eax, 0x401
+        jz      .YO
+        cmp     eax, 0x451
+        jz      .yo
+        cmp     eax, 0x410
+        jb      .unrecognized
+        cmp     eax, 0x440
+        jb      .part1
+        cmp     eax, 0x450
+        jb      .part2
+        cmp     eax, 0x25a0
+        jae     .unrecognized
+        sub     eax, 0x2500
+        jb      .unrecognized
+        mov     al, [cp866_boxes+eax]
+        ret
+.part1:
+        sub     al, 0x10-0x80
+.nothing:
+.done:
+        ret
+.part2:
+        sub     al, (0x40-0xE0) and 0xFF
+        ret
+.unrecognized:
+        mov     al, '?'
+        stc
+        ret
+.YO:
+        mov     al, 0xF0
+        stc
+        ret
+.yo:
+        mov     al, 0xF1
+        stc
+        ret
+;recode_to_cp866:
+;        rep     movsb
+;        ret
+;
+;recode_to_cp1251:
+;        xor     eax, eax
+;        jecxz   .nothing
+;  .loop:
+;        lodsb
+;        cmp     al,0x80
+;        jb      @f
+;        mov     al, [cp866_table-0x80+eax]
+;    @@: stosb
+;        loop    .loop
+;  .nothing:
+;        ret
+recode_to_utf8:
+        jecxz   .nothing
+  .loop:
+        lodsb
+        cmp     al, 0x80
+        jb      .single_byte
+        and     eax, 0x7F
+        mov     ax, [utf8_table+eax*2]
+        stosw
+        loop    .loop
+        ret
+  .single_byte:
+        stosb
+        loop    .loop
+  .nothing:
+        ret
+;recode:
+;        mov     eax, [encoding]
+;        jmp     [recode_proc+eax*4]
+;encoding        dd      UTF8
+;recode_proc     dd      recode_to_cp866, recode_to_cp1251, recode_to_utf8
+;get_byte_table  dd      get_byte_cp866, get_byte_cp1251, get_byte_utf8
+;cp1251_table:
+;  db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; 8
+;  db '?','?','?','?','?',$F9,'?','?' , '?','?','?','?','?','?','?','?' ; 9
+;  db '?',$F6,$F7,'?',$FD,'?','?','?' , $F0,'?',$F2,'?','?','?','?',$F4 ; A
+;  db $F8,'?','?','?','?','?','?',$FA , $F1,$FC,$F3,'?','?','?','?',$F5 ; B
+;  db $80,$81,$82,$83,$84,$85,$86,$87 , $88,$89,$8A,$8B,$8C,$8D,$8E,$8F ; C
+;  db $90,$91,$92,$93,$94,$95,$96,$97 , $98,$99,$9A,$9B,$9C,$9D,$9E,$9F ; D
+;  db $A0,$A1,$A2,$A3,$A4,$A5,$A6,$A7 , $A8,$A9,$AA,$AB,$AC,$AD,$AE,$AF ; E
+;  db $E0,$E1,$E2,$E3,$E4,$E5,$E6,$E7 , $E8,$E9,$EA,$EB,$EC,$ED,$EE,$EF ; F
+;    0   1   2   3   4   5   6   7     8   9   A   B   C   D   E   F
+utf8_table:
+        times 80h dw 0x98C3     ; default placeholder
+; 0x80-0xAF -> 0x90D0-0xBFD0
+repeat 0x30
+        store byte 0xD0 at utf8_table+2*(%-1)
+        store byte 0x90+%-1 at utf8_table+2*%-1
+end repeat
+; 0xE0-0xEF -> 0x80D1-0x8FD1
+repeat 0x10
+        store byte 0xD1 at utf8_table+2*(0xE0-0x80+%-1)
+        store byte 0x80+%-1 at utf8_table+2*(0xE0-0x80+%)-1
+end repeat
+; 0xF0 -> 0x81D0, 0xF1 -> 0x91D1
+        store dword 0x91D181D0 at utf8_table+2*(0xF0-0x80)
+;cp866_table:
+;  db $C0,$C1,$C2,$C3,$C4,$C5,$C6,$C7 , $C8,$C9,$CA,$CB,$CC,$CD,$CE,$CF ; 8
+;  db $D0,$D1,$D2,$D3,$D4,$D5,$D6,$D7 , $D8,$D9,$DA,$DB,$DC,$DD,$DE,$DF ; 9
+;  db $E0,$E1,$E2,$E3,$E4,$E5,$E6,$E7 , $E8,$E9,$EA,$EB,$EC,$ED,$EE,$EF ; A
+;  db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; B
+;  db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; C
+;  db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; D
+;  db $F0,$F1,$F2,$F3,$F4,$F5,$F6,$F7 , $F8,$F9,$FA,$FB,$FC,$FD,$FE,$FF ; E
+;  db $A8,$B8,$AA,$BA,$AF,$BF,$A1,$A2 , $B0,$95,$B7,'?',$B9,$A4,'?','?' ; F
+;    0   1   2   3   4   5   6   7     8   9   A   B   C   D   E   F
+; Codepoints for 0xB0-0xDF, unicode offset 0x2500
+cp866_boxes:
+        times 0xA0 db '?'
+        store byte 0xB0 at cp866_boxes+0x91
+        store byte 0xB1 at cp866_boxes+0x92
+        store byte 0xB2 at cp866_boxes+0x93
+        store byte 0xB3 at cp866_boxes+0x02
+        store byte 0xB4 at cp866_boxes+0x24
+        store byte 0xB5 at cp866_boxes+0x61
+        store byte 0xB6 at cp866_boxes+0x62
+        store byte 0xB7 at cp866_boxes+0x56
+        store byte 0xB8 at cp866_boxes+0x55
+        store byte 0xB9 at cp866_boxes+0x63
+        store byte 0xBA at cp866_boxes+0x51
+        store byte 0xBB at cp866_boxes+0x57
+        store byte 0xBC at cp866_boxes+0x5D
+        store byte 0xBD at cp866_boxes+0x5C
+        store byte 0xBE at cp866_boxes+0x5B
+        store byte 0xBF at cp866_boxes+0x10
+        store byte 0xC0 at cp866_boxes+0x14
+        store byte 0xC1 at cp866_boxes+0x34
+        store byte 0xC2 at cp866_boxes+0x2C
+        store byte 0xC3 at cp866_boxes+0x1C
+        store byte 0xC4 at cp866_boxes+0x00
+        store byte 0xC5 at cp866_boxes+0x3C
+        store byte 0xC6 at cp866_boxes+0x5E
+        store byte 0xC7 at cp866_boxes+0x5F
+        store byte 0xC8 at cp866_boxes+0x5A
+        store byte 0xC9 at cp866_boxes+0x54
+        store byte 0xCA at cp866_boxes+0x69
+        store byte 0xCB at cp866_boxes+0x66
+        store byte 0xCC at cp866_boxes+0x60
+        store byte 0xCD at cp866_boxes+0x50
+        store byte 0xCE at cp866_boxes+0x6C
+        store byte 0xCF at cp866_boxes+0x67
+        store byte 0xD0 at cp866_boxes+0x68
+        store byte 0xD1 at cp866_boxes+0x64
+        store byte 0xD2 at cp866_boxes+0x65
+        store byte 0xD3 at cp866_boxes+0x59
+        store byte 0xD4 at cp866_boxes+0x58
+        store byte 0xD5 at cp866_boxes+0x52
+        store byte 0xD6 at cp866_boxes+0x53
+        store byte 0xD7 at cp866_boxes+0x6B
+        store byte 0xD8 at cp866_boxes+0x6A
+        store byte 0xD9 at cp866_boxes+0x18
+        store byte 0xDA at cp866_boxes+0x0C
+        store byte 0xDB at cp866_boxes+0x88
+        store byte 0xDC at cp866_boxes+0x84
+        store byte 0xDD at cp866_boxes+0x8C
+        store byte 0xDE at cp866_boxes+0x90
+        store byte 0xDF at cp866_boxes+0x80

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 9105 → Rev 9106