Subversion Repositories Kolibri OS

Rev

Rev 4921 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4921 Rev 6099
1
/* Routine to translate from Japanese characters to Unicode */
1
/* Routine to translate from Japanese characters to Unicode */
2
 
2
 
3
/* Copyright (c) 2002 Red Hat Incorporated.
3
/* Copyright (c) 2002 Red Hat Incorporated.
4
   All rights reserved.
4
   All rights reserved.
5
 
5
 
6
   Redistribution and use in source and binary forms, with or without
6
   Redistribution and use in source and binary forms, with or without
7
   modification, are permitted provided that the following conditions are met:
7
   modification, are permitted provided that the following conditions are met:
8
 
8
 
9
     Redistributions of source code must retain the above copyright
9
     Redistributions of source code must retain the above copyright
10
     notice, this list of conditions and the following disclaimer.
10
     notice, this list of conditions and the following disclaimer.
11
 
11
 
12
     Redistributions in binary form must reproduce the above copyright
12
     Redistributions in binary form must reproduce the above copyright
13
     notice, this list of conditions and the following disclaimer in the
13
     notice, this list of conditions and the following disclaimer in the
14
     documentation and/or other materials provided with the distribution.
14
     documentation and/or other materials provided with the distribution.
15
 
15
 
16
     The name of Red Hat Incorporated may not be used to endorse
16
     The name of Red Hat Incorporated may not be used to endorse
17
     or promote products derived from this software without specific
17
     or promote products derived from this software without specific
18
     prior written permission.
18
     prior written permission.
19
 
19
 
20
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
   ARE DISCLAIMED.  IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
23
   ARE DISCLAIMED.  IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
24
   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24
   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25
   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25
   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26
   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26
   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27
   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
28
   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
29
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
*/
30
*/
31
 
31
 
32
#include 
32
#include 
33
 
33
 
34
#ifdef _MB_CAPABLE
34
#ifdef _MB_CAPABLE
35
/* Under Cygwin, the incoming wide character is already given in UTF due
35
/* Under Cygwin, the incoming wide character is already given in UTF due
36
   to the requirements of the underlying OS. */
36
   to the requirements of the underlying OS. */
37
#ifndef __CYGWIN__
37
#ifndef __CYGWIN__
38
 
38
 
39
#include <_ansi.h>
39
#include <_ansi.h>
-
 
40
#include 
40
#include 
41
#include 
41
#include "local.h"
42
#include "local.h"
42
#include "jp2uc.h"
43
#include "jp2uc.h"
43
 
44
 
44
/* Japanese encoding types supported */
45
/* Japanese encoding types supported */
45
#define JP_JIS		1
46
#define JP_JIS		1
46
#define JP_SJIS		2
47
#define JP_SJIS		2
47
#define JP_EUCJP	3
48
#define JP_EUCJP	3
48
 
49
 
49
static wint_t
50
static wint_t
50
_DEFUN (__jp2uc, (c, type), wint_t c _AND int type)
51
_DEFUN (__jp2uc, (c, type), wint_t c _AND int type)
51
{
52
{
52
  int index, adj;
53
  int index, adj;
53
  unsigned char byte1, byte2;
54
  unsigned char byte1, byte2;
54
  wint_t ret;
55
  wint_t ret;
55
 
56
 
56
  /* we actually use tables of EUCJP to Unicode.  For JIS, we simply
57
  /* we actually use tables of EUCJP to Unicode.  For JIS, we simply
57
     note that EUCJP is essentially JIS with the top bits on in each
58
     note that EUCJP is essentially JIS with the top bits on in each
58
     byte and translate to EUCJP.  For SJIS, we do a translation to EUCJP before
59
     byte and translate to EUCJP.  For SJIS, we do a translation to EUCJP before
59
     accessing the tables. */
60
     accessing the tables. */
60
  switch (type)
61
  switch (type)
61
    {
62
    {
62
    case JP_JIS:
63
    case JP_JIS:
63
      byte1 = (c >> 8) + 0x80;
64
      byte1 = (c >> 8) + 0x80;
64
      byte2 = (c & 0xff) + 0x80;
65
      byte2 = (c & 0xff) + 0x80;
65
      break;
66
      break;
66
    case JP_EUCJP:
67
    case JP_EUCJP:
67
      byte1 = (c >> 8);
68
      byte1 = (c >> 8);
68
      byte2 = (c & 0xff);
69
      byte2 = (c & 0xff);
69
      break;
70
      break;
70
    case JP_SJIS:
71
    case JP_SJIS:
71
      byte1 = c >> 8;
72
      byte1 = c >> 8;
72
      byte2 = c & 0xff;
73
      byte2 = c & 0xff;
73
      if (byte2 <= 0x9e)
74
      if (byte2 <= 0x9e)
74
        {
75
        {
75
          adj = 0xa1 - 0x22;
76
          adj = 0xa1 - 0x22;
76
          byte2 = (byte2 - 31) + 0xa1;
77
          byte2 = (byte2 - 31) + 0xa1;
77
        }
78
        }
78
      else
79
      else
79
        {
80
        {
80
          adj = 0xa1 - 0x21;
81
          adj = 0xa1 - 0x21;
81
          byte2 = (byte2 - 126) + 0xa1;
82
          byte2 = (byte2 - 126) + 0xa1;
82
        }
83
        }
83
      if (byte1 <= 0x9f)
84
      if (byte1 <= 0x9f)
84
        byte1 = ((byte1 - 112) << 1) + adj;
85
        byte1 = ((byte1 - 112) << 1) + adj;
85
      else
86
      else
86
        byte1 = ((byte1 - 176) << 1) + adj;
87
        byte1 = ((byte1 - 176) << 1) + adj;
87
      break;
88
      break;
88
    default:
89
    default:
89
      return WEOF;
90
      return WEOF;
90
    }
91
    }
91
 
92
 
92
  /* find conversion in jp2uc arrays */
93
  /* find conversion in jp2uc arrays */
93
 
94
 
94
  /* handle larger ranges first */
95
  /* handle larger ranges first */
95
  if (byte1 >= 0xb0 && byte1 <= 0xcf && c <= 0xcfd3)
96
  if (byte1 >= 0xb0 && byte1 <= 0xcf && c <= 0xcfd3)
96
    {
97
    {
97
      index = (byte1 - 0xb0) * 0xfe + (byte2 - 0xa1);
98
      index = (byte1 - 0xb0) * 0xfe + (byte2 - 0xa1);
98
      return b02cf[index];
99
      return b02cf[index];
99
    }
100
    }
100
  else if (byte1 >= 0xd0 && byte1 <= 0xf4 && c <= 0xf4a6)
101
  else if (byte1 >= 0xd0 && byte1 <= 0xf4 && c <= 0xf4a6)
101
    {
102
    {
102
      index = (byte1 - 0xd0) * 0xfe + (byte2 - 0xa1);
103
      index = (byte1 - 0xd0) * 0xfe + (byte2 - 0xa1);
103
      return d02f4[index];
104
      return d02f4[index];
104
    }
105
    }
105
 
106
 
106
  /* handle smaller ranges here */    
107
  /* handle smaller ranges here */    
107
  switch (byte1)
108
  switch (byte1)
108
    {
109
    {
109
    case 0xA1:
110
    case 0xA1:
110
      return (wint_t)a1[byte2 - 0xa1];
111
      return (wint_t)a1[byte2 - 0xa1];
111
    case 0xA2:
112
    case 0xA2:
112
      ret = a2[byte2 - 0xa1];
113
      ret = a2[byte2 - 0xa1];
113
      if (ret != 0)
114
      if (ret != 0)
114
	return (wint_t)ret;
115
	return (wint_t)ret;
115
      break;
116
      break;
116
    case 0xA3:
117
    case 0xA3:
117
      if (a3[byte2 - 0xa1])
118
      if (a3[byte2 - 0xa1])
118
	return (wint_t)(0xff00 + (byte2 - 0xa0));
119
	return (wint_t)(0xff00 + (byte2 - 0xa0));
119
      break;
120
      break;
120
    case 0xA4:
121
    case 0xA4:
121
      if (byte2 <= 0xf3)
122
      if (byte2 <= 0xf3)
122
	return (wint_t)(0x3000 + (byte2 - 0x60));
123
	return (wint_t)(0x3000 + (byte2 - 0x60));
123
      break;
124
      break;
124
    case 0xA5:
125
    case 0xA5:
125
      if (byte2 <= 0xf6)
126
      if (byte2 <= 0xf6)
126
	return (wint_t)(0x3000 + byte2);
127
	return (wint_t)(0x3000 + byte2);
127
      break;
128
      break;
128
    case 0xA6:
129
    case 0xA6:
129
      ret = 0;
130
      ret = 0;
130
      if (byte2 <= 0xd8)
131
      if (byte2 <= 0xd8)
131
	ret = (wint_t)a6[byte2 - 0xa1];
132
	ret = (wint_t)a6[byte2 - 0xa1];
132
      if (ret != 0)
133
      if (ret != 0)
133
	return ret;
134
	return ret;
134
      break;
135
      break;
135
    case 0xA7:
136
    case 0xA7:
136
      ret = 0;
137
      ret = 0;
137
      if (byte2 <= 0xf1)
138
      if (byte2 <= 0xf1)
138
	ret = (wint_t)a7[byte2 - 0xa1];
139
	ret = (wint_t)a7[byte2 - 0xa1];
139
      if (ret != 0)
140
      if (ret != 0)
140
	return ret;
141
	return ret;
141
      break;
142
      break;
142
    case 0xA8:
143
    case 0xA8:
143
      if (byte2 <= 0xc0)
144
      if (byte2 <= 0xc0)
144
	return (wint_t)a8[byte2 - 0xa1];
145
	return (wint_t)a8[byte2 - 0xa1];
145
      break;
146
      break;
146
    default:
147
    default:
147
      return WEOF;
148
      return WEOF;
148
    }
149
    }
149
 
150
 
150
  return WEOF; 
151
  return WEOF; 
151
}
152
}
152
 
153
 
153
wint_t
154
wint_t
154
_DEFUN (_jp2uc, (c), wint_t c)
155
_DEFUN (_jp2uc, (c), wint_t c)
155
{
156
{
156
  if (!strcmp (__locale_charset (), "JIS"))
157
  if (!strcmp (__locale_charset (), "JIS"))
157
    c = __jp2uc (c, JP_JIS);
158
    c = __jp2uc (c, JP_JIS);
158
  else if (!strcmp (__locale_charset (), "SJIS"))
159
  else if (!strcmp (__locale_charset (), "SJIS"))
159
    c = __jp2uc (c, JP_SJIS);
160
    c = __jp2uc (c, JP_SJIS);
160
  else if (!strcmp (__locale_charset (), "EUCJP"))
161
  else if (!strcmp (__locale_charset (), "EUCJP"))
161
    c = __jp2uc (c, JP_EUCJP);
162
    c = __jp2uc (c, JP_EUCJP);
162
  return c;
163
  return c;
163
}
164
}
164
 
165
 
165
#endif /* !__CYGWIN__ */
166
#endif /* !__CYGWIN__ */
166
#endif /* _MB_CAPABLE */
167
#endif /* _MB_CAPABLE */