1,148 → 1,133 |
; fetch the UTF-8 character in string+offs to char |
; common part for all encodings: translate pseudographics |
; Pseudographics for the boot screen: |
; 0x2500 -> 0xC4, 0x2502 -> 0xB3, 0x250C -> 0xDA, 0x2510 -> 0xBF, |
; 0x2514 -> 0xC0, 0x2518 -> 0xD9, 0x252C -> 0xC2, 0x2534 -> 0xC1, 0x2551 -> 0xBA |
macro fetch_utf8_char string, offs, char, graph |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
;; ;; |
;; Copyright (C) KolibriOS team 2013-2014. All rights reserved. ;; |
;; Distributed under terms of the GNU General Public License ;; |
;; ;; |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|
$Revision: 5082 $ |
|
; fetch the UTF-8 character in addrspace:offs to char |
macro fetch_utf8_char addrspace, offs, char |
{ local first_byte, b |
virtual at 0 |
db string |
if offs >= $ |
char = -1 |
else |
; fetch first byte |
load first_byte byte from offs |
load first_byte byte from addrspace:offs |
if first_byte < 0x80 |
char = first_byte |
offs = offs + 1 |
else if first_byte < 0xC0 |
.err Invalid UTF-8 string |
err Invalid UTF-8 string |
else if first_byte < 0xE0 |
char = first_byte and 0x1F |
load b byte from offs + 1 |
load b byte from addrspace:offs + 1 |
char = (char shl 6) + (b and 0x3F) |
offs = offs + 2 |
else if first_byte < 0xF0 |
char = first_byte and 0xF |
load b byte from offs + 1 |
load b byte from addrspace:offs + 1 |
char = (char shl 6) + (b and 0x3F) |
load b byte from offs + 2 |
load b byte from addrspace:offs + 2 |
char = (char shl 6) + (b and 0x3F) |
offs = offs + 3 |
else if first_byte < 0xF8 |
char = first_byte and 0x7 |
load b byte from offs + 1 |
load b byte from addrspace:offs + 1 |
char = (char shl 6) + (b and 0x3F) |
load b byte from offs + 2 |
load b byte from addrspace:offs + 2 |
char = (char shl 6) + (b and 0x3F) |
load b byte from offs + 3 |
load b byte from addrspace:offs + 3 |
char = (char shl 6) + (b and 0x3F) |
offs = offs + 4 |
else |
.err Invalid UTF-8 string |
err Invalid UTF-8 string |
end if |
end if |
} |
|
; Worker macro for all encodings. |
; Common part for all encodings: map characters 0-0x7F trivially, |
; translate pseudographics. |
; Pseudographics for the boot screen: |
; 0x2500 -> 0xC4, 0x2502 -> 0xB3, 0x250C -> 0xDA, 0x2510 -> 0xBF, |
; 0x2514 -> 0xC0, 0x2518 -> 0xD9, 0x252C -> 0xC2, 0x2534 -> 0xC1, 0x2551 -> 0xBA |
macro convert_utf8 encoding, [arg] |
{ common |
local ..addrspace, offs, char |
offs = 0 |
virtual at 0 |
..addrspace:: db arg |
..addrspace#.size = $ |
end virtual |
while offs < ..addrspace#.size |
fetch_utf8_char ..addrspace, offs, char |
if char = 0x2500 |
graph = 0xC4 |
db 0xC4 |
else if char = 0x2502 |
graph = 0xB3 |
db 0xB3 |
else if char = 0x250C |
graph = 0xDA |
db 0xDA |
else if char = 0x2510 |
graph = 0xBF |
db 0xBF |
else if char = 0x2514 |
graph = 0xC0 |
db 0xC0 |
else if char = 0x2518 |
graph = 0xD9 |
db 0xD9 |
else if char = 0x252C |
graph = 0xC2 |
db 0xC2 |
else if char = 0x2534 |
graph = 0xC1 |
db 0xC1 |
else if char = 0x2551 |
graph = 0xBA |
db 0xBA |
else if char < 0x80 |
db char |
else |
graph = 0 |
encoding char |
end if |
end while |
} |
|
macro declare_encoding encoding |
{ |
macro encoding [arg] |
\{ common convert_utf8 encoding#char, arg \} |
struc encoding [arg] |
\{ common convert_utf8 encoding#char, arg \} |
macro encoding#char char |
} |
|
; Russian: use CP866. |
; 0x00-0x7F - trivial map |
; 0x410-0x43F -> 0x80-0xAF |
; 0x440-0x44F -> 0xE0-0xEF |
; 0x401 -> 0xF0, 0x451 -> 0xF1 |
macro cp866 [arg] |
{ local offs, char, graph |
offs = 0 |
while 1 |
fetch_utf8_char arg, offs, char, graph |
if char = -1 |
break |
end if |
if graph |
db graph |
else if char < 0x80 |
db char |
else if char = 0x401 |
declare_encoding cp866 |
{ |
if char = 0x401 |
db 0xF0 |
else if char = 0x451 |
db 0xF1 |
else if (char < 0x410) | (char > 0x44F) |
.err Failed to convert to CP866 |
err Failed to convert to CP866 |
else if char < 0x440 |
db char - 0x410 + 0x80 |
else |
db char - 0x440 + 0xE0 |
end if |
end while |
} |
|
struc cp866 [arg] |
{ |
common |
cp866 arg |
} |
|
; Latin-1 encoding |
; 0x00-0xFF - trivial map |
macro latin1 [arg] |
{ local offs, char, graph |
offs = 0 |
while 1 |
fetch_utf8_char arg, offs, char, graph |
if char = -1 |
break |
end if |
if graph |
db graph |
else if char < 0x100 |
declare_encoding latin1 |
{ |
if char < 0x100 |
db char |
else |
.err Failed to convert to Latin-1 |
err Failed to convert to Latin-1 |
end if |
end while |
} |
|
struc latin1 [arg] |
; CP850 encoding |
declare_encoding cp850 |
{ |
common |
latin1 arg |
} |
|
; CP850 encoding |
macro cp850 [arg] |
{ local offs, char, graph |
offs = 0 |
while 1 |
fetch_utf8_char arg, offs, char, graph |
if char = -1 |
break |
end if |
if graph |
db graph |
else if char < 0x80 |
db char |
else if char = 0xBF |
if char = 0xBF |
db 0xA8 |
else if char = 0xE1 |
db 0xA0 |
157,11 → 142,4 |
else |
err Failed to convert to CP850 |
end if |
end while |
} |
|
struc cp850 [arg] |
{ |
common |
cp850 arg |
} |