WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/newlib/libc/locale/locale.c

Rev	Author	Line No.	Line
4349	Serge	1	/*
		2	FUNCTION
		3	<>, <>---select or query locale
		4
		5	INDEX
		6	setlocale
		7	INDEX
		8	localeconv
		9	INDEX
		10	_setlocale_r
		11	INDEX
		12	_localeconv_r
		13
		14	ANSI_SYNOPSIS
		15	#include
		16	char setlocale(int <[category]>, const char <[locale]>);
		17	lconv *localeconv(void);
		18
		19	char _setlocale_r(void <[reent]>,
		20	int <[category]>, const char *<[locale]>);
		21	lconv _localeconv_r(void <[reent]>);
		22
		23	TRAD_SYNOPSIS
		24	#include
		25	char *setlocale(<[category]>, <[locale]>)
		26	int <[category]>;
		27	char *<[locale]>;
		28
		29	lconv *localeconv();
		30
		31	char *_setlocale_r(<[reent]>, <[category]>, <[locale]>)
		32	char *<[reent]>;
		33	int <[category]>;
		34	char *<[locale]>;
		35
		36	lconv *_localeconv_r(<[reent]>);
		37	char *<[reent]>;
		38
		39	DESCRIPTION
		40	<> is the facility defined by ANSI C to condition the
		41	execution environment for international collating and formatting
		42	information; <> reports on the settings of the current
		43	locale.
		44
		45	This is a minimal implementation, supporting only the required <<"POSIX">>
		46	and <<"C">> values for <[locale]>; strings representing other locales are not
		47	honored unless _MB_CAPABLE is defined.
		48
		49	If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
		50	the form
		51
		52	language[_TERRITORY][.charset][@@modifier]
		53
		54	<<"language">> is a two character string per ISO 639, or, if not available
		55	for a given language, a three character string per ISO 639-3.
		56	<<"TERRITORY">> is a country code per ISO 3166. For <<"charset">> and
		57	<<"modifier">> see below.
		58
		59	Additionally to the POSIX specifier, the following extension is supported
		60	for backward compatibility with older implementations using newlib:
		61	<<"C-charset">>.
		62	Instead of <<"C-">>, you can also specify <<"C.">>. Both variations allow
		63	to specify language neutral locales while using other charsets than ASCII,
		64	for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
		65	but uses the UTF-8 charset.
		66
		67	The following charsets are recognized:
		68	<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
		69	<<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with
		70	1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855,
		71	857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
		72	1257, 1258].
		73
		74	Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">>
		75	are equivalent. Charset names with dashes can also be written without
		76	dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>. <<"EUCJP">> and
		77	<<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
		78
		79	Full support for all of the above charsets requires that newlib has been
		80	build with multibyte support and support for all ISO and Windows Codepage.
		81	Otherwise all singlebyte charsets are simply mapped to ASCII. Right now,
		82	only newlib for Cygwin is built with full charset support by default.
		83	Under Cygwin, this implementation additionally supports the charsets
		84	<<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and <<"Big5">>. Cygwin
		85	does not support <<"JIS">>.
		86
		87	Cygwin additionally supports locales from the file
		88	/usr/share/locale/locale.alias.
		89
		90	(<<"">> is also accepted; if given, the settings are read from the
6099	serge	91	corresponding LC_* environment variables and $LANG according to POSIX rules.)
4349	Serge	92
		93	This implementation also supports the modifier <<"cjknarrow">>, which
		94	affects how the functions <> and <> handle characters
		95	from the "CJK Ambiguous Width" category of characters described at
		96	http://www.unicode.org/reports/tr11/#Ambiguous. These characters have a width
		97	of 1 for singlebyte charsets and a width of 2 for multibyte charsets
		98	other than UTF-8. For UTF-8, their width depends on the language specifier:
		99	it is 2 for <<"zh">> (Chinese), <<"ja">> (Japanese), and <<"ko">> (Korean),
		100	and 1 for everything else. Specifying <<"cjknarrow">> forces a width of 1,
		101	independent of charset and language.
		102
		103	If you use <> as the <[locale]> argument, <> returns a
		104	pointer to the string representing the current locale. The acceptable
		105	values for <[category]> are defined in `<>' as macros
		106	beginning with <<"LC_">>.
		107
		108	<> returns a pointer to a structure (also defined in
		109	`<>') describing the locale-specific conventions currently
		110	in effect.
		111
		112	<<_localeconv_r>> and <<_setlocale_r>> are reentrant versions of
		113	<> and <> respectively. The extra argument
		114	<[reent]> is a pointer to a reentrancy structure.
		115
		116	RETURNS
		117	A successful call to <> returns a pointer to a string
		118	associated with the specified category for the new locale. The string
		119	returned by <> is such that a subsequent call using that
		120	string will restore that category (or all categories in case of LC_ALL),
		121	to that state. The application shall not modify the string returned
		122	which may be overwritten by a subsequent call to <>.
		123	On error, <> returns <>.
		124
		125	<> returns a pointer to a structure of type <>,
		126	which describes the formatting and collating conventions in effect (in
		127	this implementation, always those of the C locale).
		128
		129	PORTABILITY
		130	ANSI C requires <>, but the only locale required across all
		131	implementations is the C locale.
		132
		133	NOTES
		134	There is no ISO-8859-12 codepage. It's also refused by this implementation.
		135
		136	No supporting OS subroutines are required.
		137	*/
		138
		139	/* Parts of this code are originally taken from FreeBSD. */
		140	/*
		141	* Copyright (c) 1996 - 2002 FreeBSD Project
		142	* Copyright (c) 1991, 1993
		143	* The Regents of the University of California. All rights reserved.
		144	*
		145	* This code is derived from software contributed to Berkeley by
		146	* Paul Borman at Krystal Technologies.
		147	*
		148	* Redistribution and use in source and binary forms, with or without
		149	* modification, are permitted provided that the following conditions
		150	* are met:
		151	* 1. Redistributions of source code must retain the above copyright
		152	* notice, this list of conditions and the following disclaimer.
		153	* 2. Redistributions in binary form must reproduce the above copyright
		154	* notice, this list of conditions and the following disclaimer in the
		155	* documentation and/or other materials provided with the distribution.
		156	* 4. Neither the name of the University nor the names of its contributors
		157	* may be used to endorse or promote products derived from this software
		158	* without specific prior written permission.
		159	*
		160	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
		161	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
		162	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
		163	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
		164	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
		165	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
		166	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
		167	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
		168	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
		169	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
		170	* SUCH DAMAGE.
		171	*/
		172
		173	#include
		174	#include
		175	#include
		176	#include
		177	#include
		178	#include
		179	#include
		180	#include
		181	#include "lmessages.h"
		182	#include "lmonetary.h"
		183	#include "lnumeric.h"
		184	#include "lctype.h"
		185	#include "timelocal.h"
		186	#include "../stdlib/local.h"
		187
		188	#define _LC_LAST 7
		189	#define ENCODING_LEN 31
		190
4921	Serge	191	#ifdef __CYGWIN__ /* Cygwin starts with LC_CTYPE set to "C.UTF-8". */
		192	int __EXPORT __mb_cur_max = 6;
		193	#else
4349	Serge	194	int __EXPORT __mb_cur_max = 1;
4921	Serge	195	#endif
4349	Serge	196
		197	int __nlocale_changed = 0;
		198	int __mlocale_changed = 0;
		199	char *_PathLocale = NULL;
		200
		201	static
		202	struct lconv lconv =
		203	{
		204	".", "", "", "", "", "", "", "", "", "",
		205	CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
		206	CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
		207	CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
		208	CHAR_MAX, CHAR_MAX
		209	};
		210
		211	#ifdef _MB_CAPABLE
		212	/*
		213	* Category names for getenv()
		214	*/
		215	static char *categories[_LC_LAST] = {
		216	"LC_ALL",
		217	"LC_COLLATE",
		218	"LC_CTYPE",
		219	"LC_MONETARY",
		220	"LC_NUMERIC",
		221	"LC_TIME",
		222	"LC_MESSAGES",
		223	};
		224
		225	/*
		226	* Default locale per POSIX. Can be overridden on a per-target base.
		227	*/
		228	#ifndef DEFAULT_LOCALE
		229	#define DEFAULT_LOCALE "C"
		230	#endif
		231	/*
		232	* This variable can be changed by any outside mechanism. This allows,
		233	* for instance, to load the default locale from a file.
		234	*/
		235	char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE;
		236
		237	/*
		238	* Current locales for each category
		239	*/
		240	static char current_categories[_LC_LAST][ENCODING_LEN + 1] = {
		241	"C",
		242	"C",
		243	#ifdef __CYGWIN__ /* Cygwin starts with LC_CTYPE set to "C.UTF-8". */
		244	"C.UTF-8",
		245	#else
		246	"C",
		247	#endif
		248	"C",
		249	"C",
		250	"C",
		251	"C",
		252	};
		253
		254	/*
		255	* The locales we are going to try and load
		256	*/
		257	static char new_categories[_LC_LAST][ENCODING_LEN + 1];
		258	static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
		259
		260	static char current_locale_string[_LC_LAST * (ENCODING_LEN + 1/"/"/ + 1)];
		261	static char *currentlocale(void);
		262	static char loadlocale(struct _reent , int);
		263	static const char __get_locale_env(struct _reent , int);
		264
		265	#endif /* _MB_CAPABLE */
		266
		267	#ifdef __CYGWIN__
		268	static char lc_ctype_charset[ENCODING_LEN + 1] = "UTF-8";
		269	#else
		270	static char lc_ctype_charset[ENCODING_LEN + 1] = "ASCII";
		271	#endif
		272	static char lc_message_charset[ENCODING_LEN + 1] = "ASCII";
		273	static int lc_ctype_cjk_lang = 0;
		274
		275	char *
		276	_DEFUN(_setlocale_r, (p, category, locale),
		277	struct _reent *p _AND
		278	int category _AND
		279	_CONST char *locale)
		280	{
		281	#ifndef _MB_CAPABLE
		282	if (locale)
		283	{
		284	if (strcmp (locale, "POSIX") && strcmp (locale, "C")
		285	&& strcmp (locale, ""))
		286	return NULL;
		287	}
		288	return "C";
		289	#else /* !_MB_CAPABLE */
		290	int i, j, len, saverr;
		291	const char env, r;
		292
		293	if (category < LC_ALL \|\| category >= _LC_LAST)
		294	{
		295	p->_errno = EINVAL;
		296	return NULL;
		297	}
		298
		299	if (locale == NULL)
		300	return category != LC_ALL ? current_categories[category] : currentlocale();
		301
		302	/*
		303	* Default to the current locale for everything.
		304	*/
		305	for (i = 1; i < _LC_LAST; ++i)
		306	strcpy (new_categories[i], current_categories[i]);
		307
		308	/*
		309	* Now go fill up new_categories from the locale argument
		310	*/
		311	if (!*locale)
		312	{
		313	if (category == LC_ALL)
		314	{
		315	for (i = 1; i < _LC_LAST; ++i)
		316	{
		317	env = __get_locale_env (p, i);
		318	if (strlen (env) > ENCODING_LEN)
		319	{
		320	p->_errno = EINVAL;
		321	return NULL;
		322	}
		323	strcpy (new_categories[i], env);
		324	}
		325	}
		326	else
		327	{
		328	env = __get_locale_env (p, category);
		329	if (strlen (env) > ENCODING_LEN)
		330	{
		331	p->_errno = EINVAL;
		332	return NULL;
		333	}
		334	strcpy (new_categories[category], env);
		335	}
		336	}
		337	else if (category != LC_ALL)
		338	{
		339	if (strlen (locale) > ENCODING_LEN)
		340	{
		341	p->_errno = EINVAL;
		342	return NULL;
		343	}
		344	strcpy (new_categories[category], locale);
		345	}
		346	else
		347	{
		348	if ((r = strchr (locale, '/')) == NULL)
		349	{
		350	if (strlen (locale) > ENCODING_LEN)
		351	{
		352	p->_errno = EINVAL;
		353	return NULL;
		354	}
		355	for (i = 1; i < _LC_LAST; ++i)
		356	strcpy (new_categories[i], locale);
		357	}
		358	else
		359	{
		360	for (i = 1; r[1] == '/'; ++r)
		361	;
		362	if (!r[1])
		363	{
		364	p->_errno = EINVAL;
		365	return NULL; /* Hmm, just slashes... */
		366	}
		367	do
		368	{
		369	if (i == _LC_LAST)
		370	break; /* Too many slashes... */
		371	if ((len = r - locale) > ENCODING_LEN)
		372	{
		373	p->_errno = EINVAL;
		374	return NULL;
		375	}
		376	strlcpy (new_categories[i], locale, len + 1);
		377	i++;
		378	while (*r == '/')
		379	r++;
		380	locale = r;
		381	while (r && r != '/')
		382	r++;
		383	}
		384	while (*locale);
		385	while (i < _LC_LAST)
		386	{
		387	strcpy (new_categories[i], new_categories[i-1]);
		388	i++;
		389	}
		390	}
		391	}
		392
		393	if (category != LC_ALL)
		394	return loadlocale (p, category);
		395
		396	for (i = 1; i < _LC_LAST; ++i)
		397	{
		398	strcpy (saved_categories[i], current_categories[i]);
		399	if (loadlocale (p, i) == NULL)
		400	{
		401	saverr = p->_errno;
		402	for (j = 1; j < i; j++)
		403	{
		404	strcpy (new_categories[j], saved_categories[j]);
		405	if (loadlocale (p, j) == NULL)
		406	{
		407	strcpy (new_categories[j], "C");
		408	loadlocale (p, j);
		409	}
		410	}
		411	p->_errno = saverr;
		412	return NULL;
		413	}
		414	}
		415	return currentlocale ();
		416	#endif /* !_MB_CAPABLE */
		417	}
		418
		419	#ifdef _MB_CAPABLE
		420	static char *
		421	currentlocale()
		422	{
		423	int i;
		424
		425	(void)strcpy(current_locale_string, current_categories[1]);
		426
		427	for (i = 2; i < _LC_LAST; ++i)
		428	if (strcmp(current_categories[1], current_categories[i])) {
		429	for (i = 2; i < _LC_LAST; ++i) {
		430	(void)strcat(current_locale_string, "/");
		431	(void)strcat(current_locale_string,
		432	current_categories[i]);
		433	}
		434	break;
		435	}
		436	return (current_locale_string);
		437	}
		438	#endif /* _MB_CAPABLE */
		439
		440	#ifdef _MB_CAPABLE
		441	#ifdef __CYGWIN__
		442	extern void __set_charset_from_locale (const char locale, char charset);
		443	extern char __set_locale_from_locale_alias (const char , char *);
		444	extern int __collate_load_locale (const char , void , const char *);
		445	#endif /* __CYGWIN__ */
		446
		447	extern void __set_ctype (const char *charset);
		448
		449	static char *
		450	loadlocale(struct _reent *p, int category)
		451	{
		452	/* At this point a full-featured system would just load the locale
		453	specific data from the locale files.
		454	What we do here for now is to check the incoming string for correctness.
		455	The string must be in one of the allowed locale strings, either
		456	one in POSIX-style, or one in the old newlib style to maintain
		457	backward compatibility. If the local string is correct, the charset
		458	is extracted and stored in lc_ctype_charset or lc_message_charset
		459	dependent on the cateogry. */
		460	char *locale = NULL;
		461	char charset[ENCODING_LEN + 1];
		462	unsigned long val;
		463	char end, c = NULL;
		464	int mbc_max;
		465	int (l_wctomb) (struct _reent , char , wchar_t, const char , mbstate_t *);
		466	int (l_mbtowc) (struct _reent , wchar_t , const char , size_t,
		467	const char , mbstate_t );
		468	int cjknarrow = 0;
		469
		470	/* Avoid doing everything twice if nothing has changed. */
		471	if (!strcmp (new_categories[category], current_categories[category]))
		472	return current_categories[category];
		473
		474	#ifdef __CYGWIN__
		475	/* This additional code handles the case that the incoming locale string
		476	is not valid. If so, it calls the function __set_locale_from_locale_alias,
		477	which is only available on Cygwin right now. The function reads the
		478	file /usr/share/locale/locale.alias. The file contains locale aliases
		479	and their replacement locale. For instance, the alias "french" is
		480	translated to "fr_FR.ISO-8859-1", the alias "thai" is translated to
		481	"th_TH.TIS-620". If successful, the function returns with a pointer
		482	to the second argument, which is a buffer in which the replacement locale
		483	gets stored. Otherwise the function returns NULL. */
		484	char tmp_locale[ENCODING_LEN + 1];
		485	int ret = 0;
		486
		487	restart:
		488	if (!locale)
		489	locale = new_categories[category];
		490	else if (locale != tmp_locale)
		491	{
		492	locale = __set_locale_from_locale_alias (locale, tmp_locale);
		493	if (!locale)
		494	return NULL;
		495	}
		496	# define FAIL goto restart
		497	#else
		498	locale = new_categories[category];
		499	# define FAIL return NULL
		500	#endif
6099	serge	501
4349	Serge	502	/* "POSIX" is translated to "C", as on Linux. */
		503	if (!strcmp (locale, "POSIX"))
		504	strcpy (locale, "C");
		505	if (!strcmp (locale, "C")) /* Default "C" locale */
		506	strcpy (charset, "ASCII");
		507	else if (locale[0] == 'C'
		508	&& (locale[1] == '-' /* Old newlib style */
		509	\|\| locale[1] == '.')) /* Extension for the C locale to allow
		510	specifying different charsets while
		511	sticking to the C locale in terms
		512	of sort order, etc. Proposed in
		513	the Debian project. */
		514	{
		515	char *chp;
		516
		517	c = locale + 2;
		518	strcpy (charset, c);
		519	if ((chp = strchr (charset, '@')))
		520	/* Strip off modifier */
		521	*chp = '\0';
		522	c += strlen (charset);
		523	}
		524	else /* POSIX style */
		525	{
		526	c = locale;
		527
		528	/* Don't use ctype macros here, they might be localized. */
		529	/* Language */
		530	if (c[0] < 'a' \|\| c[0] > 'z'
		531	\|\| c[1] < 'a' \|\| c[1] > 'z')
		532	FAIL;
		533	c += 2;
		534	/* Allow three character Language per ISO 639-3 */
		535	if (c[0] >= 'a' && c[0] <= 'z')
		536	++c;
		537	if (c[0] == '_')
		538	{
		539	/* Territory */
		540	++c;
		541	if (c[0] < 'A' \|\| c[0] > 'Z'
		542	\|\| c[1] < 'A' \|\| c[1] > 'Z')
		543	FAIL;
		544	c += 2;
		545	}
		546	if (c[0] == '.')
		547	{
		548	/* Charset */
		549	char *chp;
		550
		551	++c;
		552	strcpy (charset, c);
		553	if ((chp = strchr (charset, '@')))
		554	/* Strip off modifier */
		555	*chp = '\0';
		556	c += strlen (charset);
		557	}
		558	else if (c[0] == '\0' \|\| c[0] == '@')
		559	/* End of string or just a modifier */
		560	#ifdef __CYGWIN__
		561	/* The Cygwin-only function __set_charset_from_locale checks
		562	for the default charset which is connected to the given locale.
		563	The function uses Windows functions in turn so it can't be easily
		564	adapted to other targets. However, if any other target provides
		565	equivalent functionality, preferrably using the same function name
		566	it would be sufficient to change the guarding #ifdef. */
		567	__set_charset_from_locale (locale, charset);
		568	#else
		569	strcpy (charset, "ISO-8859-1");
		570	#endif
		571	else
		572	/* Invalid string */
		573	FAIL;
		574	}
		575	if (c && c[0] == '@')
6099	serge	576	{
		577	/* Modifier */
		578	/* Only one modifier is recognized right now. "cjknarrow" is used
		579	to modify the behaviour of wcwidth() for East Asian languages.
		580	For details see the comment at the end of this function. */
		581	if (!strcmp (c + 1, "cjknarrow"))
		582	cjknarrow = 1;
		583	}
4349	Serge	584	/* We only support this subset of charsets. */
		585	switch (charset[0])
		586	{
		587	case 'U':
		588	case 'u':
		589	if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
		590	FAIL;
		591	strcpy (charset, "UTF-8");
		592	mbc_max = 6;
		593	l_wctomb = __utf8_wctomb;
		594	l_mbtowc = __utf8_mbtowc;
		595	break;
		596	#ifndef __CYGWIN__
		597	/* Cygwin does not support JIS at all. */
		598	case 'J':
		599	case 'j':
		600	if (strcasecmp (charset, "JIS"))
		601	FAIL;
		602	strcpy (charset, "JIS");
		603	mbc_max = 8;
		604	l_wctomb = __jis_wctomb;
		605	l_mbtowc = __jis_mbtowc;
		606	break;
		607	#endif /* !__CYGWIN__ */
		608	case 'E':
		609	case 'e':
		610	if (strncasecmp (charset, "EUC", 3))
		611	FAIL;
		612	c = charset + 3;
		613	if (*c == '-')
		614	++c;
		615	if (!strcasecmp (c, "JP"))
		616	{
		617	strcpy (charset, "EUCJP");
		618	mbc_max = 3;
		619	l_wctomb = __eucjp_wctomb;
		620	l_mbtowc = __eucjp_mbtowc;
		621	}
		622	#ifdef __CYGWIN__
		623	/* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's
		624	implementation requires Windows support. */
		625	else if (!strcasecmp (c, "KR"))
		626	{
		627	strcpy (charset, "EUCKR");
		628	mbc_max = 2;
		629	l_wctomb = __kr_wctomb;
		630	l_mbtowc = __kr_mbtowc;
		631	}
		632	else if (!strcasecmp (c, "CN"))
		633	{
		634	strcpy (charset, "EUCCN");
		635	mbc_max = 2;
		636	l_wctomb = __gbk_wctomb;
		637	l_mbtowc = __gbk_mbtowc;
		638	}
		639	#endif /* __CYGWIN__ */
		640	else
		641	FAIL;
		642	break;
		643	case 'S':
		644	case 's':
		645	if (strcasecmp (charset, "SJIS"))
		646	FAIL;
		647	strcpy (charset, "SJIS");
		648	mbc_max = 2;
		649	l_wctomb = __sjis_wctomb;
		650	l_mbtowc = __sjis_mbtowc;
		651	break;
		652	case 'I':
		653	case 'i':
		654	/* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
		655	ISO-8859-12. This code also recognizes the aliases without dashes. */
		656	if (strncasecmp (charset, "ISO", 3))
		657	FAIL;
		658	c = charset + 3;
		659	if (*c == '-')
		660	++c;
		661	if (strncasecmp (c, "8859", 4))
		662	FAIL;
		663	c += 4;
		664	if (*c == '-')
		665	++c;
		666	val = _strtol_r (p, c, &end, 10);
		667	if (val < 1 \|\| val > 16 \|\| val == 12 \|\| *end)
		668	FAIL;
		669	strcpy (charset, "ISO-8859-");
		670	c = charset + 9;
		671	if (val > 10)
		672	*c++ = '1';
		673	*c++ = val % 10 + '0';
		674	*c = '\0';
		675	mbc_max = 1;
		676	#ifdef _MB_EXTENDED_CHARSETS_ISO
		677	l_wctomb = __iso_wctomb;
		678	l_mbtowc = __iso_mbtowc;
		679	#else /* !_MB_EXTENDED_CHARSETS_ISO */
		680	l_wctomb = __ascii_wctomb;
		681	l_mbtowc = __ascii_mbtowc;
		682	#endif /* _MB_EXTENDED_CHARSETS_ISO */
		683	break;
		684	case 'C':
		685	case 'c':
		686	if (charset[1] != 'P' && charset[1] != 'p')
		687	FAIL;
		688	strncpy (charset, "CP", 2);
		689	val = _strtol_r (p, charset + 2, &end, 10);
		690	if (*end)
		691	FAIL;
		692	switch (val)
		693	{
		694	case 437:
		695	case 720:
		696	case 737:
		697	case 775:
		698	case 850:
		699	case 852:
		700	case 855:
		701	case 857:
		702	case 858:
		703	case 862:
		704	case 866:
		705	case 874:
		706	case 1125:
		707	case 1250:
		708	case 1251:
		709	case 1252:
		710	case 1253:
		711	case 1254:
		712	case 1255:
		713	case 1256:
		714	case 1257:
		715	case 1258:
		716	mbc_max = 1;
		717	#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
		718	l_wctomb = __cp_wctomb;
		719	l_mbtowc = __cp_mbtowc;
		720	#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
		721	l_wctomb = __ascii_wctomb;
		722	l_mbtowc = __ascii_mbtowc;
		723	#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
		724	break;
		725	case 932:
		726	mbc_max = 2;
		727	l_wctomb = __sjis_wctomb;
		728	l_mbtowc = __sjis_mbtowc;
		729	break;
		730	default:
		731	FAIL;
		732	}
		733	break;
		734	case 'K':
		735	case 'k':
		736	/* KOI8-R, KOI8-U and the aliases without dash */
		737	if (strncasecmp (charset, "KOI8", 4))
		738	FAIL;
		739	c = charset + 4;
		740	if (*c == '-')
		741	++c;
		742	if (c == 'R' \|\| c == 'r')
		743	strcpy (charset, "CP20866");
		744	else if (c == 'U' \|\| c == 'u')
		745	strcpy (charset, "CP21866");
		746	else
		747	FAIL;
		748	mbc_max = 1;
		749	#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
		750	l_wctomb = __cp_wctomb;
		751	l_mbtowc = __cp_mbtowc;
		752	#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
		753	l_wctomb = __ascii_wctomb;
		754	l_mbtowc = __ascii_mbtowc;
		755	#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
		756	break;
		757	case 'A':
		758	case 'a':
		759	if (strcasecmp (charset, "ASCII"))
		760	FAIL;
		761	strcpy (charset, "ASCII");
		762	mbc_max = 1;
		763	l_wctomb = __ascii_wctomb;
		764	l_mbtowc = __ascii_mbtowc;
		765	break;
		766	case 'G':
		767	case 'g':
		768	#ifdef __CYGWIN__
		769	/* Newlib does not provide GBK/GB2312 and Cygwin's implementation
		770	requires Windows support. */
		771	if (!strcasecmp (charset, "GBK")
		772	\|\| !strcasecmp (charset, "GB2312"))
		773	{
		774	strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK");
6099	serge	775	mbc_max = 2;
		776	l_wctomb = __gbk_wctomb;
		777	l_mbtowc = __gbk_mbtowc;
4349	Serge	778	}
		779	else
		780	#endif /* __CYGWIN__ */
		781	/* GEORGIAN-PS and the alias without dash */
		782	if (!strncasecmp (charset, "GEORGIAN", 8))
		783	{
		784	c = charset + 8;
		785	if (*c == '-')
		786	++c;
		787	if (strcasecmp (c, "PS"))
		788	FAIL;
		789	strcpy (charset, "CP101");
		790	mbc_max = 1;
		791	#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
		792	l_wctomb = __cp_wctomb;
		793	l_mbtowc = __cp_mbtowc;
		794	#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
		795	l_wctomb = __ascii_wctomb;
		796	l_mbtowc = __ascii_mbtowc;
		797	#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
		798	}
		799	else
		800	FAIL;
		801	break;
		802	case 'P':
		803	case 'p':
		804	/* PT154 */
		805	if (strcasecmp (charset, "PT154"))
		806	FAIL;
		807	strcpy (charset, "CP102");
		808	mbc_max = 1;
		809	#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
		810	l_wctomb = __cp_wctomb;
		811	l_mbtowc = __cp_mbtowc;
		812	#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
		813	l_wctomb = __ascii_wctomb;
		814	l_mbtowc = __ascii_mbtowc;
		815	#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
		816	break;
		817	case 'T':
		818	case 't':
		819	if (strncasecmp (charset, "TIS", 3))
		820	FAIL;
		821	c = charset + 3;
		822	if (*c == '-')
		823	++c;
		824	if (strcasecmp (c, "620"))
		825	FAIL;
		826	strcpy (charset, "CP874");
		827	mbc_max = 1;
		828	#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
		829	l_wctomb = __cp_wctomb;
		830	l_mbtowc = __cp_mbtowc;
		831	#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
		832	l_wctomb = __ascii_wctomb;
		833	l_mbtowc = __ascii_mbtowc;
		834	#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
		835	break;
		836	#ifdef __CYGWIN__
		837	/* Newlib does not provide Big5 and Cygwin's implementation
		838	requires Windows support. */
		839	case 'B':
		840	case 'b':
		841	if (strcasecmp (charset, "BIG5"))
		842	FAIL;
		843	strcpy (charset, "BIG5");
		844	mbc_max = 2;
		845	l_wctomb = __big5_wctomb;
		846	l_mbtowc = __big5_mbtowc;
		847	break;
		848	#endif /* __CYGWIN__ */
		849	default:
		850	FAIL;
		851	}
		852	switch (category)
		853	{
		854	case LC_CTYPE:
		855	strcpy (lc_ctype_charset, charset);
		856	__mb_cur_max = mbc_max;
		857	__wctomb = l_wctomb;
		858	__mbtowc = l_mbtowc;
		859	__set_ctype (charset);
		860	/* Determine the width for the "CJK Ambiguous Width" category of
		861	characters. This is used in wcwidth(). Assume single width for
		862	single-byte charsets, and double width for multi-byte charsets
		863	other than UTF-8. For UTF-8, use double width for the East Asian
		864	languages ("ja", "ko", "zh"), and single width for everything else.
		865	Single width can also be forced with the "@cjknarrow" modifier. */
		866	lc_ctype_cjk_lang = !cjknarrow
		867	&& mbc_max > 1
		868	&& (charset[0] != 'U'
		869	\|\| strncmp (locale, "ja", 2) == 0
		870	\|\| strncmp (locale, "ko", 2) == 0
		871	\|\| strncmp (locale, "zh", 2) == 0);
		872	#ifdef __HAVE_LOCALE_INFO__
		873	ret = __ctype_load_locale (locale, (void *) l_wctomb, charset, mbc_max);
		874	#endif /* __HAVE_LOCALE_INFO__ */
		875	break;
		876	case LC_MESSAGES:
		877	strcpy (lc_message_charset, charset);
		878	#ifdef __HAVE_LOCALE_INFO__
		879	ret = __messages_load_locale (locale, (void *) l_wctomb, charset);
		880	if (!ret)
		881	#endif /* __HAVE_LOCALE_INFO__ */
		882	break;
		883	#ifdef __HAVE_LOCALE_INFO__
		884	#ifdef __CYGWIN__
		885	/* Right now only Cygwin supports a __collate_load_locale function at all. */
		886	case LC_COLLATE:
		887	ret = __collate_load_locale (locale, (void *) l_mbtowc, charset);
		888	break;
		889	#endif
		890	case LC_MONETARY:
		891	ret = __monetary_load_locale (locale, (void *) l_wctomb, charset);
		892	break;
		893	case LC_NUMERIC:
		894	ret = __numeric_load_locale (locale, (void *) l_wctomb, charset);
		895	break;
		896	case LC_TIME:
		897	ret = __time_load_locale (locale, (void *) l_wctomb, charset);
		898	break;
		899	#endif /* __HAVE_LOCALE_INFO__ */
		900	default:
		901	break;
		902	}
		903	#ifdef __HAVE_LOCALE_INFO__
		904	if (ret)
		905	FAIL;
		906	#endif /* __HAVE_LOCALE_INFO__ */
		907	return strcpy(current_categories[category], new_categories[category]);
		908	}
		909
		910	static const char *
		911	__get_locale_env(struct _reent *p, int category)
		912	{
		913	const char *env;
		914
		915	/* 1. check LC_ALL. */
		916	env = _getenv_r (p, categories[0]);
		917
		918	/* 2. check LC_* */
		919	if (env == NULL \|\| !*env)
		920	env = _getenv_r (p, categories[category]);
		921
		922	/* 3. check LANG */
		923	if (env == NULL \|\| !*env)
		924	env = _getenv_r (p, "LANG");
		925
		926	/* 4. if none is set, fall to default locale */
		927	if (env == NULL \|\| !*env)
		928	env = __default_locale;
		929
		930	return env;
		931	}
		932	#endif /* _MB_CAPABLE */
		933
		934	char *
		935	_DEFUN_VOID(__locale_charset)
		936	{
		937	#if 0//def __HAVE_LOCALE_INFO__
		938	return __get_current_ctype_locale ()->codeset;
		939	#else
		940	return lc_ctype_charset;
		941	#endif
		942	}
		943
		944	int
		945	_DEFUN_VOID(__locale_mb_cur_max)
		946	{
		947	#if 0//def __HAVE_LOCALE_INFO__
		948	return __get_current_ctype_locale ()->mb_cur_max[0];
		949	#else
		950	return __mb_cur_max;
		951	#endif
		952	}
		953
		954
		955	char *
		956	_DEFUN_VOID(__locale_msgcharset)
		957	{
		958	#ifdef __HAVE_LOCALE_INFO__
		959	return (char *) __get_current_messages_locale ()->codeset;
		960	#else
		961	return lc_message_charset;
		962	#endif
		963	}
		964
		965	int
		966	_DEFUN_VOID(__locale_cjk_lang)
		967	{
		968	return lc_ctype_cjk_lang;
		969	}
		970
		971	struct lconv *
		972	_DEFUN(_localeconv_r, (data),
		973	struct _reent *data)
		974	{
		975	#ifdef __HAVE_LOCALE_INFO__
		976	if (__nlocale_changed)
		977	{
		978	struct lc_numeric_T *n = __get_current_numeric_locale ();
		979	lconv.decimal_point = (char *) n->decimal_point;
		980	lconv.thousands_sep = (char *) n->thousands_sep;
		981	lconv.grouping = (char *) n->grouping;
		982	__nlocale_changed = 0;
		983	}
		984	if (__mlocale_changed)
		985	{
		986	struct lc_monetary_T *m = __get_current_monetary_locale ();
		987	lconv.int_curr_symbol = (char *) m->int_curr_symbol;
		988	lconv.currency_symbol = (char *) m->currency_symbol;
		989	lconv.mon_decimal_point = (char *) m->mon_decimal_point;
		990	lconv.mon_thousands_sep = (char *) m->mon_thousands_sep;
		991	lconv.mon_grouping = (char *) m->mon_grouping;
		992	lconv.positive_sign = (char *) m->positive_sign;
		993	lconv.negative_sign = (char *) m->negative_sign;
		994	lconv.int_frac_digits = m->int_frac_digits[0];
		995	lconv.frac_digits = m->frac_digits[0];
		996	lconv.p_cs_precedes = m->p_cs_precedes[0];
		997	lconv.p_sep_by_space = m->p_sep_by_space[0];
		998	lconv.n_cs_precedes = m->n_cs_precedes[0];
		999	lconv.n_sep_by_space = m->n_sep_by_space[0];
		1000	lconv.p_sign_posn = m->p_sign_posn[0];
		1001	lconv.n_sign_posn = m->n_sign_posn[0];
		1002	#ifdef __HAVE_LOCALE_INFO_EXTENDED__
		1003	lconv.int_p_cs_precedes = m->int_p_cs_precedes[0];
		1004	lconv.int_p_sep_by_space = m->int_p_sep_by_space[0];
		1005	lconv.int_n_cs_precedes = m->int_n_cs_precedes[0];
		1006	lconv.int_n_sep_by_space = m->int_n_sep_by_space[0];
		1007	lconv.int_n_sign_posn = m->int_n_sign_posn[0];
		1008	lconv.int_p_sign_posn = m->int_p_sign_posn[0];
		1009	#else /* !__HAVE_LOCALE_INFO_EXTENDED__ */
		1010	lconv.int_p_cs_precedes = m->p_cs_precedes[0];
		1011	lconv.int_p_sep_by_space = m->p_sep_by_space[0];
		1012	lconv.int_n_cs_precedes = m->n_cs_precedes[0];
		1013	lconv.int_n_sep_by_space = m->n_sep_by_space[0];
		1014	lconv.int_n_sign_posn = m->n_sign_posn[0];
		1015	lconv.int_p_sign_posn = m->p_sign_posn[0];
		1016	#endif /* !__HAVE_LOCALE_INFO_EXTENDED__ */
		1017	__mlocale_changed = 0;
		1018	}
		1019	#endif /* __HAVE_LOCALE_INFO__ */
		1020	return (struct lconv *) &lconv;
		1021	}
		1022
		1023	#ifndef _REENT_ONLY
		1024
		1025	#ifndef __CYGWIN__
		1026	/* Cygwin provides its own version of setlocale to perform some more
		1027	initialization work. It calls _setlocale_r, though. */
		1028	char *
		1029	_DEFUN(setlocale, (category, locale),
		1030	int category _AND
		1031	_CONST char *locale)
		1032	{
		1033	return _setlocale_r (_REENT, category, locale);
		1034	}
		1035	#endif /* __CYGWIN__ */
		1036
		1037	struct lconv *
		1038	_DEFUN_VOID(localeconv)
		1039	{
		1040	return _localeconv_r (_REENT);
		1041	}
		1042
		1043	#endif

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/newlib/libc/locale/locale.c – Rev 9865