Subversion Repositories Kolibri OS

Rev

Rev 4060 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  2. ;;                                                                 ;;
  3. ;; Copyright (C) KolibriOS team 2004-2013. All rights reserved.    ;;
  4. ;; Distributed under terms of the GNU General Public License       ;;
  5. ;;                                                                 ;;
  6. ;;                                                                 ;;
  7. ;;         GNU GENERAL PUBLIC LICENSE                              ;;
  8. ;;          Version 2, June 1991                                   ;;
  9. ;;                                                                 ;;
  10. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  11.  
  12.  
  13. get_next_byte:
  14. ; Load next byte from the packet, translating to cp866 if necessary
  15. ; At input esi = pointer to data, edx = limit of data
  16. ; Output is either (translated) byte in al with CF set or CF cleared.
  17.         mov     eax, [encoding]
  18.         jmp     [get_byte_table+eax*4]
  19.  
  20. get_byte_cp866:
  21.         cmp     esi, edx
  22.         jae     .nothing
  23.         lodsb
  24. .nothing:
  25.         ret
  26.  
  27. get_byte_cp1251:
  28.         cmp     esi, edx
  29.         jae     .nothing
  30.         lodsb
  31.         cmp     al, 0x80
  32.         jb      @f
  33.         and     eax, 0x7F
  34.         mov     al, [cp1251_table+eax]
  35. @@:
  36.         stc
  37. .nothing:
  38.         ret
  39.  
  40. get_byte_utf8:
  41. ; UTF8 decoding is slightly complicated.
  42. ; One character can occupy one or more bytes.
  43. ; The boundary in packets theoretically can be anywhere in data,
  44. ; so this procedure keeps internal state between calls and handles
  45. ; one byte at a time, looping until character is read or packet is over.
  46. ; Globally, there are two distinct tasks: decode byte sequence to unicode char
  47. ; and convert this unicode char to our base encoding (that is cp866).
  48. ; 1. Check that there are data.
  49.         cmp     esi, edx
  50.         jae     .nothing
  51. ; 2. Load byte.
  52.         lodsb
  53.         movzx   ecx, al
  54. ; 3. Bytes in an UTF8 sequence can be of any of three types.
  55. ; If most significant bit is cleared, sequence is one byte and usual ASCII char.
  56. ; First byte of a sequence must be 11xxxxxx, other bytes are 10yyyyyy.
  57.         and     al, 0xC0
  58.         jns     .single_byte
  59.         jp      .first_byte
  60. ; 4. This byte is not first in UTF8 sequence.
  61. ; 4a. Check that the sequence was started. If no, it is invalid byte
  62. ; and we simply ignore it.
  63.         cmp     [utf8_bytes_rest], 0
  64.         jz      get_byte_utf8
  65. ; 4b. Otherwise, it is really next byte and it gives some more bits of char.
  66.         mov     eax, [utf8_char]
  67.         shl     eax, 6
  68.         lea     eax, [eax+ecx-0x80]
  69. ; 4c. Decrement number of bytes rest in the sequence.
  70. ; If it goes to zero, character is read, so return it.
  71.         dec     [utf8_bytes_rest]
  72.         jz      .got_char
  73.         mov     [utf8_char], eax
  74.         jmp     get_byte_utf8
  75. ; 5. If the byte is first in UTF8 sequence, calculate the number of leading 1s
  76. ; - it equals total number of bytes in the sequence; some other bits rest for
  77. ; leading bits in the character.
  78. .first_byte:
  79.         mov     eax, -1
  80. @@:
  81.         inc     eax
  82.         add     cl, cl
  83.         js      @b
  84.         mov     [utf8_bytes_rest], eax
  85.         xchg    eax, ecx
  86.         inc     ecx
  87.         shr     al, cl
  88.         mov     [utf8_char], eax
  89.         jmp     get_byte_utf8
  90. ; 6. If the byte is ASCII char, it is the character.
  91. .single_byte:
  92.         xchg    eax, ecx
  93. .got_char:
  94. ; We got the character, now abandon a possible sequence in progress.
  95.         and     [utf8_bytes_rest], 0
  96. ; Now second task. The unicode character is in eax, and now we shall convert it
  97. ; to cp866.
  98.         cmp     eax, 0x80
  99.         jb      .done
  100. ; 0x410-0x43F -> 0x80-0xAF, 0x440-0x44F -> 0xE0-0xEF, 0x401 -> 0xF0, 0x451 -> 0xF1
  101.         cmp     eax, 0x401
  102.         jz      .YO
  103.         cmp     eax, 0x451
  104.         jz      .yo
  105.         cmp     eax, 0x410
  106.         jb      .unrecognized
  107.         cmp     eax, 0x440
  108.         jb      .part1
  109.         cmp     eax, 0x450
  110.         jae     .unrecognized
  111.         sub     al, (0x40-0xE0) and 0xFF
  112.         ret
  113. .part1:
  114.         sub     al, 0x10-0x80
  115. .nothing:
  116. .done:
  117.         ret
  118. .unrecognized:
  119.         mov     al, '?'
  120.         stc
  121.         ret
  122. .YO:
  123.         mov     al, 0xF0
  124.         stc
  125.         ret
  126. .yo:
  127.         mov     al, 0xF1
  128.         stc
  129.         ret
  130.  
  131.  
  132.  
  133. print_character:
  134.  
  135.         pusha
  136.  
  137.         cmp     bl, 13          ; line beginning
  138.         jne     nobol
  139.  
  140.         mov     ecx, [pos]
  141.         inc     ecx
  142.   boll1:
  143.         dec     ecx
  144.         mov     eax, ecx
  145.         xor     edx, edx
  146.         mov     ebx, [textbox_width]
  147.         div     ebx
  148.         test    edx, edx
  149.         jnz     boll1
  150.         mov     [pos], ecx
  151.         jmp     newdata
  152.   nobol:
  153.  
  154.         cmp     bl, 10          ; line down
  155.         jne     nolf
  156.  
  157.    addx1:
  158.         inc     [pos]
  159.         mov     eax, [pos]
  160.         xor     edx, edx
  161.         mov     ecx, [textbox_width]
  162.         div     ecx
  163.         test    edx, edx
  164.         jnz     addx1
  165.         mov     eax, [pos]
  166.         jmp     cm1
  167.   nolf:
  168.   no_lf_ret:
  169.  
  170.  
  171.         cmp     bl, 15          ; character
  172.         jbe     newdata
  173.  
  174.         mov     eax, [irc_data]
  175.         shl     eax, 8
  176.         mov     al, bl
  177.         mov     [irc_data], eax
  178.  
  179.         mov     eax, [pos]
  180.     ;---- draw data
  181.         pusha
  182.  
  183.         and     ebx, 0xff
  184.         add     eax, [text_start]
  185.         mov     [eax], bl
  186.  
  187.         popa
  188.     ;---- draw data
  189.  
  190.         mov     eax, [pos]
  191.         inc     eax
  192.   cm1:
  193.         mov     ebx, [scroll+4]
  194.         imul    ebx, [textbox_width]
  195.         cmp     eax, ebx
  196.         jb      noeaxz
  197.  
  198.         mov     esi, [text_start]
  199.         add     esi, [textbox_width]
  200.  
  201.         mov     edi, [text_start]
  202.         mov     ecx, ebx
  203.         rep     movsb
  204.  
  205.         mov     esi, [text_start]
  206.         mov     ecx, [textbox_width]
  207.         imul    ecx, 61
  208.         add     esi, ecx
  209.  
  210.         mov     edi, [text_start]
  211.         mov     ecx, [textbox_width]
  212.         imul    ecx, 60
  213.         add     edi, ecx
  214.         mov     ecx, ebx
  215.         rep     movsb
  216.  
  217.         mov     eax, ebx
  218.         sub     eax, [textbox_width]
  219.   noeaxz:
  220.         mov     [pos], eax
  221.  
  222.   newdata:
  223.         mov     eax, [window_print]
  224.         or      [eax + window.flags], FLAG_UPDATED
  225.  
  226.         popa
  227.         ret
  228.  
  229.  
  230.  
  231. recode_to_cp866:
  232.         rep     movsb
  233.         ret
  234.  
  235. recode_to_cp1251:
  236.         xor     eax, eax
  237.         jecxz   .nothing
  238.   .loop:
  239.         lodsb
  240.         cmp     al,0x80
  241.         jb      @f
  242.         mov     al, [cp866_table-0x80+eax]
  243.     @@: stosb
  244.         loop    .loop
  245.   .nothing:
  246.         ret
  247.  
  248. recode_to_utf8:
  249.         jecxz   .nothing
  250.   .loop:
  251.         lodsb
  252.         cmp     al, 0x80
  253.         jb      .single_byte
  254.         and     eax, 0x7F
  255.         mov     ax, [utf8_table+eax*2]
  256.         stosw
  257.         loop    .loop
  258.         ret
  259.   .single_byte:
  260.         stosb
  261.         loop    .loop
  262.   .nothing:
  263.         ret
  264.  
  265. recode:
  266.         mov     eax, [encoding]
  267.         jmp     [recode_proc+eax*4]
  268.  
  269.  
  270.  
  271. encoding        dd      UTF8
  272. recode_proc     dd      recode_to_cp866, recode_to_cp1251, recode_to_utf8
  273. get_byte_table  dd      get_byte_cp866, get_byte_cp1251, get_byte_utf8
  274.  
  275.  
  276. cp1251_table:
  277.   db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; 8
  278.   db '?','?','?','?','?',$F9,'?','?' , '?','?','?','?','?','?','?','?' ; 9
  279.   db '?',$F6,$F7,'?',$FD,'?','?','?' , $F0,'?',$F2,'?','?','?','?',$F4 ; A
  280.   db $F8,'?','?','?','?','?','?',$FA , $F1,$FC,$F3,'?','?','?','?',$F5 ; B
  281.   db $80,$81,$82,$83,$84,$85,$86,$87 , $88,$89,$8A,$8B,$8C,$8D,$8E,$8F ; C
  282.   db $90,$91,$92,$93,$94,$95,$96,$97 , $98,$99,$9A,$9B,$9C,$9D,$9E,$9F ; D
  283.   db $A0,$A1,$A2,$A3,$A4,$A5,$A6,$A7 , $A8,$A9,$AA,$AB,$AC,$AD,$AE,$AF ; E
  284.   db $E0,$E1,$E2,$E3,$E4,$E5,$E6,$E7 , $E8,$E9,$EA,$EB,$EC,$ED,$EE,$EF ; F
  285.  
  286. ;    0   1   2   3   4   5   6   7     8   9   A   B   C   D   E   F
  287.  
  288. utf8_table:
  289.         times 80h dw 0x98C3     ; default placeholder
  290.  
  291. ; 0x80-0xAF -> 0x90D0-0xBFD0
  292. repeat 0x30
  293.         store byte 0xD0 at utf8_table+2*(%-1)
  294.         store byte 0x90+%-1 at utf8_table+2*%-1
  295. end repeat
  296.  
  297. ; 0xE0-0xEF -> 0x80D1-0x8FD1
  298. repeat 0x10
  299.         store byte 0xD1 at utf8_table+2*(0xE0-0x80+%-1)
  300.         store byte 0x80+%-1 at utf8_table+2*(0xE0-0x80+%)-1
  301. end repeat
  302.  
  303. ; 0xF0 -> 0x81D0, 0xF1 -> 0x91D1
  304.         store dword 0x91D181D0 at utf8_table+2*(0xF0-0x80)
  305.  
  306. cp866_table:
  307.   db $C0,$C1,$C2,$C3,$C4,$C5,$C6,$C7 , $C8,$C9,$CA,$CB,$CC,$CD,$CE,$CF ; 8
  308.   db $D0,$D1,$D2,$D3,$D4,$D5,$D6,$D7 , $D8,$D9,$DA,$DB,$DC,$DD,$DE,$DF ; 9
  309.   db $E0,$E1,$E2,$E3,$E4,$E5,$E6,$E7 , $E8,$E9,$EA,$EB,$EC,$ED,$EE,$EF ; A
  310.   db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; B
  311.   db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; C
  312.   db '?','?','?','?','?','?','?','?' , '?','?','?','?','?','?','?','?' ; D
  313.   db $F0,$F1,$F2,$F3,$F4,$F5,$F6,$F7 , $F8,$F9,$FA,$FB,$FC,$FD,$FE,$FF ; E
  314.   db $A8,$B8,$AA,$BA,$AF,$BF,$A1,$A2 , $B0,$95,$B7,'?',$B9,$A4,'?','?' ; F
  315.  
  316. ;    0   1   2   3   4   5   6   7     8   9   A   B   C   D   E   F
  317.  
  318.