Subversion Repositories Kolibri OS

Rev

Rev 4921 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4921 Serge 1
/* Routine to translate from Japanese characters to Unicode */
2
 
3
/* Copyright (c) 2002 Red Hat Incorporated.
4
   All rights reserved.
5
 
6
   Redistribution and use in source and binary forms, with or without
7
   modification, are permitted provided that the following conditions are met:
8
 
9
     Redistributions of source code must retain the above copyright
10
     notice, this list of conditions and the following disclaimer.
11
 
12
     Redistributions in binary form must reproduce the above copyright
13
     notice, this list of conditions and the following disclaimer in the
14
     documentation and/or other materials provided with the distribution.
15
 
16
     The name of Red Hat Incorporated may not be used to endorse
17
     or promote products derived from this software without specific
18
     prior written permission.
19
 
20
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
   ARE DISCLAIMED.  IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
24
   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25
   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26
   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27
   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
*/
31
 
32
#include 
33
 
34
#ifdef _MB_CAPABLE
35
/* Under Cygwin, the incoming wide character is already given in UTF due
36
   to the requirements of the underlying OS. */
37
#ifndef __CYGWIN__
38
 
39
#include <_ansi.h>
6099 serge 40
#include 
4921 Serge 41
#include 
42
#include "local.h"
43
#include "jp2uc.h"
44
 
45
/* Japanese encoding types supported */
46
#define JP_JIS		1
47
#define JP_SJIS		2
48
#define JP_EUCJP	3
49
 
50
static wint_t
51
_DEFUN (__jp2uc, (c, type), wint_t c _AND int type)
52
{
53
  int index, adj;
54
  unsigned char byte1, byte2;
55
  wint_t ret;
56
 
57
  /* we actually use tables of EUCJP to Unicode.  For JIS, we simply
58
     note that EUCJP is essentially JIS with the top bits on in each
59
     byte and translate to EUCJP.  For SJIS, we do a translation to EUCJP before
60
     accessing the tables. */
61
  switch (type)
62
    {
63
    case JP_JIS:
64
      byte1 = (c >> 8) + 0x80;
65
      byte2 = (c & 0xff) + 0x80;
66
      break;
67
    case JP_EUCJP:
68
      byte1 = (c >> 8);
69
      byte2 = (c & 0xff);
70
      break;
71
    case JP_SJIS:
72
      byte1 = c >> 8;
73
      byte2 = c & 0xff;
74
      if (byte2 <= 0x9e)
75
        {
76
          adj = 0xa1 - 0x22;
77
          byte2 = (byte2 - 31) + 0xa1;
78
        }
79
      else
80
        {
81
          adj = 0xa1 - 0x21;
82
          byte2 = (byte2 - 126) + 0xa1;
83
        }
84
      if (byte1 <= 0x9f)
85
        byte1 = ((byte1 - 112) << 1) + adj;
86
      else
87
        byte1 = ((byte1 - 176) << 1) + adj;
88
      break;
89
    default:
90
      return WEOF;
91
    }
92
 
93
  /* find conversion in jp2uc arrays */
94
 
95
  /* handle larger ranges first */
96
  if (byte1 >= 0xb0 && byte1 <= 0xcf && c <= 0xcfd3)
97
    {
98
      index = (byte1 - 0xb0) * 0xfe + (byte2 - 0xa1);
99
      return b02cf[index];
100
    }
101
  else if (byte1 >= 0xd0 && byte1 <= 0xf4 && c <= 0xf4a6)
102
    {
103
      index = (byte1 - 0xd0) * 0xfe + (byte2 - 0xa1);
104
      return d02f4[index];
105
    }
106
 
107
  /* handle smaller ranges here */
108
  switch (byte1)
109
    {
110
    case 0xA1:
111
      return (wint_t)a1[byte2 - 0xa1];
112
    case 0xA2:
113
      ret = a2[byte2 - 0xa1];
114
      if (ret != 0)
115
	return (wint_t)ret;
116
      break;
117
    case 0xA3:
118
      if (a3[byte2 - 0xa1])
119
	return (wint_t)(0xff00 + (byte2 - 0xa0));
120
      break;
121
    case 0xA4:
122
      if (byte2 <= 0xf3)
123
	return (wint_t)(0x3000 + (byte2 - 0x60));
124
      break;
125
    case 0xA5:
126
      if (byte2 <= 0xf6)
127
	return (wint_t)(0x3000 + byte2);
128
      break;
129
    case 0xA6:
130
      ret = 0;
131
      if (byte2 <= 0xd8)
132
	ret = (wint_t)a6[byte2 - 0xa1];
133
      if (ret != 0)
134
	return ret;
135
      break;
136
    case 0xA7:
137
      ret = 0;
138
      if (byte2 <= 0xf1)
139
	ret = (wint_t)a7[byte2 - 0xa1];
140
      if (ret != 0)
141
	return ret;
142
      break;
143
    case 0xA8:
144
      if (byte2 <= 0xc0)
145
	return (wint_t)a8[byte2 - 0xa1];
146
      break;
147
    default:
148
      return WEOF;
149
    }
150
 
151
  return WEOF;
152
}
153
 
154
wint_t
155
_DEFUN (_jp2uc, (c), wint_t c)
156
{
157
  if (!strcmp (__locale_charset (), "JIS"))
158
    c = __jp2uc (c, JP_JIS);
159
  else if (!strcmp (__locale_charset (), "SJIS"))
160
    c = __jp2uc (c, JP_SJIS);
161
  else if (!strcmp (__locale_charset (), "EUCJP"))
162
    c = __jp2uc (c, JP_EUCJP);
163
  return c;
164
}
165
 
166
#endif /* !__CYGWIN__ */
167
#endif /* _MB_CAPABLE */