Subversion Repositories Kolibri OS

Rev

Rev 1693 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 1693 Rev 3065
Line 88... Line 88...
88
/usr/share/locale/locale.alias.
88
/usr/share/locale/locale.alias.
Line 89... Line 89...
89
 
89
 
90
(<<"">> is also accepted; if given, the settings are read from the
90
(<<"">> is also accepted; if given, the settings are read from the
Line 91... Line 91...
91
corresponding LC_* environment variables and $LANG according to POSIX rules.
91
corresponding LC_* environment variables and $LANG according to POSIX rules.
92
 
-
 
93
This implementation also supports a single modifier, <<"cjknarrow">>.
-
 
94
Any other modifier is ignored.  <<"cjknarrow">>, in conjunction with one
92
 
95
of the language specifiers <<"ja">>, <<"ko">>, and <<"zh">> specifies
93
This implementation also supports the modifier <<"cjknarrow">>, which
96
how the functions <> and <> handle characters from
94
affects how the functions <> and <> handle characters
97
the "CJK Ambiguous Width" character class described in
95
from the "CJK Ambiguous Width" category of characters described at
98
http://www.unicode.org/unicode/reports/tr11/.  Usually these characters
96
http://www.unicode.org/reports/tr11/#Ambiguous. These characters have a width
99
have a width of 1, unless you specify one of the aforementioned
97
of 1 for singlebyte charsets and a width of 2 for multibyte charsets
100
languages, in which case these characters have a width of 2.  By
98
other than UTF-8. For UTF-8, their width depends on the language specifier:
-
 
99
it is 2 for <<"zh">> (Chinese), <<"ja">> (Japanese), and <<"ko">> (Korean),
Line 101... Line 100...
101
specifying the <<"cjknarrow">> modifier, these characters will have a
100
and 1 for everything else. Specifying <<"cjknarrow">> forces a width of 1,
102
width of one in the languages <<"ja">>, <<"ko">>, and <<"zh">> as well.
101
independent of charset and language.
103
 
102
 
104
If you use <> as the <[locale]> argument, <> returns a
103
If you use <> as the <[locale]> argument, <> returns a
Line 181... Line 180...
181
#include 
180
#include 
182
#include "lmessages.h"
181
#include "lmessages.h"
183
#include "lmonetary.h"
182
#include "lmonetary.h"
184
#include "lnumeric.h"
183
#include "lnumeric.h"
185
#include "lctype.h"
184
#include "lctype.h"
-
 
185
#include "timelocal.h"
186
#include "../stdlib/local.h"
186
#include "../stdlib/local.h"
Line 187... Line 187...
187
 
187
 
188
#define _LC_LAST      7
188
#define _LC_LAST      7
Line 234... Line 234...
234
 * Current locales for each category
234
 * Current locales for each category
235
 */
235
 */
236
static char current_categories[_LC_LAST][ENCODING_LEN + 1] = {
236
static char current_categories[_LC_LAST][ENCODING_LEN + 1] = {
237
    "C",
237
    "C",
238
    "C",
238
    "C",
-
 
239
#ifdef __CYGWIN__ /* Cygwin starts with LC_CTYPE set to "C.UTF-8". */
-
 
240
    "C.UTF-8",
-
 
241
#else
239
    "C",
242
    "C",
-
 
243
#endif
240
    "C",
244
    "C",
241
    "C",
245
    "C",
242
    "C",
246
    "C",
243
    "C",
247
    "C",
244
};
248
};
Line 254... Line 258...
254
static char *loadlocale(struct _reent *, int);
258
static char *loadlocale(struct _reent *, int);
255
static const char *__get_locale_env(struct _reent *, int);
259
static const char *__get_locale_env(struct _reent *, int);
Line 256... Line 260...
256
 
260
 
Line 257... Line 261...
257
#endif /* _MB_CAPABLE */
261
#endif /* _MB_CAPABLE */
258
 
262
 
259
#if 0 /*def __CYGWIN__  TODO: temporarily(?) disable C == UTF-8 */
-
 
260
static char lc_ctype_charset[ENCODING_LEN + 1] = "UTF-8";
263
#ifdef __CYGWIN__
261
static char lc_message_charset[ENCODING_LEN + 1] = "UTF-8";
264
static char lc_ctype_charset[ENCODING_LEN + 1] = "UTF-8";
262
#else
-
 
263
static char lc_ctype_charset[ENCODING_LEN + 1] = "ASCII";
265
#else
-
 
266
static char lc_ctype_charset[ENCODING_LEN + 1] = "ASCII";
264
static char lc_message_charset[ENCODING_LEN + 1] = "ASCII";
267
#endif
Line 265... Line 268...
265
#endif
268
static char lc_message_charset[ENCODING_LEN + 1] = "ASCII";
266
static int lc_ctype_cjk_lang = 0;
269
static int lc_ctype_cjk_lang = 0;
267
 
270
 
Line 431... Line 434...
431
#endif /* _MB_CAPABLE */
434
#endif /* _MB_CAPABLE */
Line 432... Line 435...
432
 
435
 
433
#ifdef _MB_CAPABLE
436
#ifdef _MB_CAPABLE
434
#ifdef __CYGWIN__
437
#ifdef __CYGWIN__
435
extern void __set_charset_from_locale (const char *locale, char *charset);
438
extern void __set_charset_from_locale (const char *locale, char *charset);
436
extern int __set_locale_from_locale_alias (const char *, char *);
439
extern char *__set_locale_from_locale_alias (const char *, char *);
437
extern int __collate_load_locale (const char *, void *, const char *);
440
extern int __collate_load_locale (const char *, void *, const char *);
Line 438... Line 441...
438
#endif /* __CYGWIN__ */
441
#endif /* __CYGWIN__ */
Line 451... Line 454...
451
     is extracted and stored in lc_ctype_charset or lc_message_charset
454
     is extracted and stored in lc_ctype_charset or lc_message_charset
452
     dependent on the cateogry. */
455
     dependent on the cateogry. */
453
  char *locale = NULL;
456
  char *locale = NULL;
454
  char charset[ENCODING_LEN + 1];
457
  char charset[ENCODING_LEN + 1];
455
  unsigned long val;
458
  unsigned long val;
456
  char *end, *c;
459
  char *end, *c = NULL;
457
  int mbc_max;
460
  int mbc_max;
458
  int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *);
461
  int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *);
459
  int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t,
462
  int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t,
460
		   const char *, mbstate_t *);
463
		   const char *, mbstate_t *);
461
  int cjknarrow = 0;
464
  int cjknarrow = 0;
Line 494... Line 497...
494
  
497
  
495
  /* "POSIX" is translated to "C", as on Linux. */
498
  /* "POSIX" is translated to "C", as on Linux. */
496
  if (!strcmp (locale, "POSIX"))
499
  if (!strcmp (locale, "POSIX"))
497
    strcpy (locale, "C");
500
    strcpy (locale, "C");
498
  if (!strcmp (locale, "C"))				/* Default "C" locale */
-
 
499
#if 0 /*def __CYGWIN__  TODO: temporarily(?) disable C == UTF-8 */
-
 
500
    strcpy (charset, "UTF-8");
-
 
501
#else
501
  if (!strcmp (locale, "C"))				/* Default "C" locale */
502
    strcpy (charset, "ASCII");
-
 
503
#endif
502
    strcpy (charset, "ASCII");
504
  else if (locale[0] == 'C'
503
  else if (locale[0] == 'C'
505
	   && (locale[1] == '-'		/* Old newlib style */
504
	   && (locale[1] == '-'		/* Old newlib style */
506
	       || locale[1] == '.'))	/* Extension for the C locale to allow
505
	       || locale[1] == '.'))	/* Extension for the C locale to allow
507
					   specifying different charsets while
506
					   specifying different charsets while
508
					   sticking to the C locale in terms
507
					   sticking to the C locale in terms
509
					   of sort order, etc.  Proposed in
508
					   of sort order, etc.  Proposed in
-
 
509
					   the Debian project. */
-
 
510
    {
-
 
511
      char *chp;
-
 
512
 
510
					   the Debian project. */
513
      c = locale + 2;
-
 
514
      strcpy (charset, c);
-
 
515
      if ((chp = strchr (charset, '@')))
-
 
516
        /* Strip off modifier */
-
 
517
        *chp = '\0';
-
 
518
      c += strlen (charset);
511
    strcpy (charset, locale + 2);
519
    }
512
  else							/* POSIX style */
520
  else							/* POSIX style */
513
    {
521
    {
Line 514... Line 522...
514
      c = locale;
522
      c = locale;
Line 557... Line 565...
557
	strcpy (charset, "ISO-8859-1");
565
	strcpy (charset, "ISO-8859-1");
558
#endif
566
#endif
559
      else
567
      else
560
	/* Invalid string */
568
	/* Invalid string */
561
      	FAIL;
569
      	FAIL;
-
 
570
    }
562
      if (c[0] == '@')
571
  if (c && c[0] == '@')
563
	{
572
	{
564
	  /* Modifier */
573
	  /* Modifier */
565
	  /* Only one modifier is recognized right now.  "cjknarrow" is used
574
	  /* Only one modifier is recognized right now.  "cjknarrow" is used
566
	     to modify the behaviour of wcwidth() for East Asian languages.
575
	     to modify the behaviour of wcwidth() for East Asian languages.
567
	     For details see the comment at the end of this function. */
576
	     For details see the comment at the end of this function. */
568
	  if (!strcmp (c + 1, "cjknarrow"))
577
	  if (!strcmp (c + 1, "cjknarrow"))
569
	    cjknarrow = 1;
578
	    cjknarrow = 1;
570
	}
579
	}
571
    }
-
 
572
  /* We only support this subset of charsets. */
580
  /* We only support this subset of charsets. */
573
  switch (charset[0])
581
  switch (charset[0])
574
    {
582
    {
575
    case 'U':
583
    case 'U':
576
    case 'u':
584
    case 'u':
Line 843... Line 851...
843
      strcpy (lc_ctype_charset, charset);
851
      strcpy (lc_ctype_charset, charset);
844
      __mb_cur_max = mbc_max;
852
      __mb_cur_max = mbc_max;
845
      __wctomb = l_wctomb;
853
      __wctomb = l_wctomb;
846
      __mbtowc = l_mbtowc;
854
      __mbtowc = l_mbtowc;
847
      __set_ctype (charset);
855
      __set_ctype (charset);
848
      /* Check for the language part of the locale specifier.  In case
856
      /* Determine the width for the "CJK Ambiguous Width" category of
849
         of "ja", "ko", or "zh", assume the use of CJK fonts, unless the
857
         characters. This is used in wcwidth(). Assume single width for
850
	 "@cjknarrow" modifier has been specifed.
858
         single-byte charsets, and double width for multi-byte charsets
851
	 The result is stored in lc_ctype_cjk_lang and tested in wcwidth()
859
         other than UTF-8. For UTF-8, use double width for the East Asian
852
	 to figure out the width to return (1 or 2) for the "CJK Ambiguous
860
         languages ("ja", "ko", "zh"), and single width for everything else.
853
	 Width" category of characters. */
861
         Single width can also be forced with the "@cjknarrow" modifier. */
854
      lc_ctype_cjk_lang = !cjknarrow
862
      lc_ctype_cjk_lang = !cjknarrow
-
 
863
			  && mbc_max > 1
-
 
864
			  && (charset[0] != 'U'
855
			  && ((strncmp (locale, "ja", 2) == 0
865
			      || strncmp (locale, "ja", 2) == 0
856
			      || strncmp (locale, "ko", 2) == 0
866
			      || strncmp (locale, "ko", 2) == 0
857
			      || strncmp (locale, "zh", 2) == 0));
867
			      || strncmp (locale, "zh", 2) == 0);
858
#ifdef __HAVE_LOCALE_INFO__
868
#ifdef __HAVE_LOCALE_INFO__
859
      ret = __ctype_load_locale (locale, (void *) l_wctomb, charset, mbc_max);
869
      ret = __ctype_load_locale (locale, (void *) l_wctomb, charset, mbc_max);
860
#endif /* __HAVE_LOCALE_INFO__ */
870
#endif /* __HAVE_LOCALE_INFO__ */
861
      break;
871
      break;
862
    case LC_MESSAGES:
872
    case LC_MESSAGES:
Line 940... Line 950...
940
 
950
 
941
char *
951
char *
942
_DEFUN_VOID(__locale_msgcharset)
952
_DEFUN_VOID(__locale_msgcharset)
943
{
953
{
944
#ifdef __HAVE_LOCALE_INFO__
954
#ifdef __HAVE_LOCALE_INFO__
945
  return __get_current_messages_locale ()->codeset;
955
  return (char *) __get_current_messages_locale ()->codeset;
946
#else
956
#else
947
  return lc_message_charset;
957
  return lc_message_charset;
948
#endif
958
#endif
Line 960... Line 970...
960
{
970
{
961
#ifdef __HAVE_LOCALE_INFO__
971
#ifdef __HAVE_LOCALE_INFO__
962
  if (__nlocale_changed)
972
  if (__nlocale_changed)
963
    {
973
    {
964
      struct lc_numeric_T *n = __get_current_numeric_locale ();
974
      struct lc_numeric_T *n = __get_current_numeric_locale ();
965
      lconv.decimal_point = n->decimal_point;
975
      lconv.decimal_point = (char *) n->decimal_point;
966
      lconv.thousands_sep = n->thousands_sep;
976
      lconv.thousands_sep = (char *) n->thousands_sep;
967
      lconv.grouping = n->grouping;
977
      lconv.grouping = (char *) n->grouping;
968
      __nlocale_changed = 0;
978
      __nlocale_changed = 0;
969
    }
979
    }
970
  if (__mlocale_changed)
980
  if (__mlocale_changed)
971
    {
981
    {
972
      struct lc_monetary_T *m = __get_current_monetary_locale ();
982
      struct lc_monetary_T *m = __get_current_monetary_locale ();
973
      lconv.int_curr_symbol = m->int_curr_symbol;
983
      lconv.int_curr_symbol = (char *) m->int_curr_symbol;
974
      lconv.currency_symbol = m->currency_symbol;
984
      lconv.currency_symbol = (char *) m->currency_symbol;
975
      lconv.mon_decimal_point = m->mon_decimal_point;
985
      lconv.mon_decimal_point = (char *) m->mon_decimal_point;
976
      lconv.mon_thousands_sep = m->mon_thousands_sep;
986
      lconv.mon_thousands_sep = (char *) m->mon_thousands_sep;
977
      lconv.mon_grouping = m->mon_grouping;
987
      lconv.mon_grouping = (char *) m->mon_grouping;
978
      lconv.positive_sign = m->positive_sign;
988
      lconv.positive_sign = (char *) m->positive_sign;
979
      lconv.negative_sign = m->negative_sign;
989
      lconv.negative_sign = (char *) m->negative_sign;
980
      lconv.int_frac_digits = m->int_frac_digits[0];
990
      lconv.int_frac_digits = m->int_frac_digits[0];
981
      lconv.frac_digits = m->frac_digits[0];
991
      lconv.frac_digits = m->frac_digits[0];
982
      lconv.p_cs_precedes = m->p_cs_precedes[0];
992
      lconv.p_cs_precedes = m->p_cs_precedes[0];
983
      lconv.p_sep_by_space = m->p_sep_by_space[0];
993
      lconv.p_sep_by_space = m->p_sep_by_space[0];
984
      lconv.n_cs_precedes = m->n_cs_precedes[0];
994
      lconv.n_cs_precedes = m->n_cs_precedes[0];