WebSVN – Kolibri OS – Blame – /programs/develop/libraries/newlib/locale/locale.c

Rev	Author	Line No.	Line
1693	serge	1	/*
		2	FUNCTION
		3	<>, <>---select or query locale
		4
		5	INDEX
		6	setlocale
		7	INDEX
		8	localeconv
		9	INDEX
		10	_setlocale_r
		11	INDEX
		12	_localeconv_r
		13
		14	ANSI_SYNOPSIS
		15	#include
		16	char setlocale(int <[category]>, const char <[locale]>);
		17	lconv *localeconv(void);
		18
		19	char _setlocale_r(void <[reent]>,
		20	int <[category]>, const char *<[locale]>);
		21	lconv _localeconv_r(void <[reent]>);
		22
		23	TRAD_SYNOPSIS
		24	#include
		25	char *setlocale(<[category]>, <[locale]>)
		26	int <[category]>;
		27	char *<[locale]>;
		28
		29	lconv *localeconv();
		30
		31	char *_setlocale_r(<[reent]>, <[category]>, <[locale]>)
		32	char *<[reent]>;
		33	int <[category]>;
		34	char *<[locale]>;
		35
		36	lconv *_localeconv_r(<[reent]>);
		37	char *<[reent]>;
		38
		39	DESCRIPTION
		40	<> is the facility defined by ANSI C to condition the
		41	execution environment for international collating and formatting
		42	information; <> reports on the settings of the current
		43	locale.
		44
		45	This is a minimal implementation, supporting only the required <<"POSIX">>
		46	and <<"C">> values for <[locale]>; strings representing other locales are not
		47	honored unless _MB_CAPABLE is defined.
		48
		49	If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
		50	the form
		51
		52	language[_TERRITORY][.charset][@@modifier]
		53
		54	<<"language">> is a two character string per ISO 639, or, if not available
		55	for a given language, a three character string per ISO 639-3.
		56	<<"TERRITORY">> is a country code per ISO 3166. For <<"charset">> and
		57	<<"modifier">> see below.
		58
		59	Additionally to the POSIX specifier, the following extension is supported
		60	for backward compatibility with older implementations using newlib:
		61	<<"C-charset">>.
		62	Instead of <<"C-">>, you can also specify <<"C.">>. Both variations allow
		63	to specify language neutral locales while using other charsets than ASCII,
		64	for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
		65	but uses the UTF-8 charset.
		66
		67	The following charsets are recognized:
		68	<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
		69	<<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with
		70	1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855,
		71	857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
		72	1257, 1258].
		73
		74	Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">>
		75	are equivalent. Charset names with dashes can also be written without
		76	dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>. <<"EUCJP">> and
		77	<<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
		78
		79	Full support for all of the above charsets requires that newlib has been
		80	build with multibyte support and support for all ISO and Windows Codepage.
		81	Otherwise all singlebyte charsets are simply mapped to ASCII. Right now,
		82	only newlib for Cygwin is built with full charset support by default.
		83	Under Cygwin, this implementation additionally supports the charsets
		84	<<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and <<"Big5">>. Cygwin
		85	does not support <<"JIS">>.
		86
		87	Cygwin additionally supports locales from the file
		88	/usr/share/locale/locale.alias.
		89
		90	(<<"">> is also accepted; if given, the settings are read from the
		91	corresponding LC_* environment variables and $LANG according to POSIX rules.
		92
3065	serge	93	This implementation also supports the modifier <<"cjknarrow">>, which
		94	affects how the functions <> and <> handle characters
		95	from the "CJK Ambiguous Width" category of characters described at
		96	http://www.unicode.org/reports/tr11/#Ambiguous. These characters have a width
		97	of 1 for singlebyte charsets and a width of 2 for multibyte charsets
		98	other than UTF-8. For UTF-8, their width depends on the language specifier:
		99	it is 2 for <<"zh">> (Chinese), <<"ja">> (Japanese), and <<"ko">> (Korean),
		100	and 1 for everything else. Specifying <<"cjknarrow">> forces a width of 1,
		101	independent of charset and language.
1693	serge	102
		103	If you use <> as the <[locale]> argument, <> returns a
		104	pointer to the string representing the current locale. The acceptable
		105	values for <[category]> are defined in `<>' as macros
		106	beginning with <<"LC_">>.
		107
		108	<> returns a pointer to a structure (also defined in
		109	`<>') describing the locale-specific conventions currently
		110	in effect.
		111
		112	<<_localeconv_r>> and <<_setlocale_r>> are reentrant versions of
		113	<> and <> respectively. The extra argument
		114	<[reent]> is a pointer to a reentrancy structure.
		115
		116	RETURNS
		117	A successful call to <> returns a pointer to a string
		118	associated with the specified category for the new locale. The string
		119	returned by <> is such that a subsequent call using that
		120	string will restore that category (or all categories in case of LC_ALL),
		121	to that state. The application shall not modify the string returned
		122	which may be overwritten by a subsequent call to <>.
		123	On error, <> returns <>.
		124
		125	<> returns a pointer to a structure of type <>,
		126	which describes the formatting and collating conventions in effect (in
		127	this implementation, always those of the C locale).
		128
		129	PORTABILITY
		130	ANSI C requires <>, but the only locale required across all
		131	implementations is the C locale.
		132
		133	NOTES
		134	There is no ISO-8859-12 codepage. It's also refused by this implementation.
		135
		136	No supporting OS subroutines are required.
		137	*/
		138
		139	/* Parts of this code are originally taken from FreeBSD. */
		140	/*
		141	* Copyright (c) 1996 - 2002 FreeBSD Project
		142	* Copyright (c) 1991, 1993
		143	* The Regents of the University of California. All rights reserved.
		144	*
		145	* This code is derived from software contributed to Berkeley by
		146	* Paul Borman at Krystal Technologies.
		147	*
		148	* Redistribution and use in source and binary forms, with or without
		149	* modification, are permitted provided that the following conditions
		150	* are met:
		151	* 1. Redistributions of source code must retain the above copyright
		152	* notice, this list of conditions and the following disclaimer.
		153	* 2. Redistributions in binary form must reproduce the above copyright
		154	* notice, this list of conditions and the following disclaimer in the
		155	* documentation and/or other materials provided with the distribution.
		156	* 4. Neither the name of the University nor the names of its contributors
		157	* may be used to endorse or promote products derived from this software
		158	* without specific prior written permission.
		159	*
		160	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
		161	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
		162	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
		163	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
		164	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
		165	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
		166	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
		167	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
		168	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
		169	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
		170	* SUCH DAMAGE.
		171	*/
		172
		173	#include
		174	#include
		175	#include
		176	#include
		177	#include
		178	#include
		179	#include
		180	#include
		181	#include "lmessages.h"
		182	#include "lmonetary.h"
		183	#include "lnumeric.h"
		184	#include "lctype.h"
3065	serge	185	#include "timelocal.h"
1693	serge	186	#include "../stdlib/local.h"
		187
		188	#define _LC_LAST 7
		189	#define ENCODING_LEN 31
		190
		191	int __EXPORT __mb_cur_max = 1;
		192
		193	int __nlocale_changed = 0;
		194	int __mlocale_changed = 0;
		195	char *_PathLocale = NULL;
		196
		197	static
		198	struct lconv lconv =
		199	{
		200	".", "", "", "", "", "", "", "", "", "",
		201	CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
		202	CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
		203	CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
		204	CHAR_MAX, CHAR_MAX
		205	};
		206
		207	#ifdef _MB_CAPABLE
		208	/*
		209	* Category names for getenv()
		210	*/
		211	static char *categories[_LC_LAST] = {
		212	"LC_ALL",
		213	"LC_COLLATE",
		214	"LC_CTYPE",
		215	"LC_MONETARY",
		216	"LC_NUMERIC",
		217	"LC_TIME",
		218	"LC_MESSAGES",
		219	};
		220
		221	/*
		222	* Default locale per POSIX. Can be overridden on a per-target base.
		223	*/
		224	#ifndef DEFAULT_LOCALE
		225	#define DEFAULT_LOCALE "C"
		226	#endif
		227	/*
		228	* This variable can be changed by any outside mechanism. This allows,
		229	* for instance, to load the default locale from a file.
		230	*/
		231	char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE;
		232
		233	/*
		234	* Current locales for each category
		235	*/
		236	static char current_categories[_LC_LAST][ENCODING_LEN + 1] = {
		237	"C",
		238	"C",
3065	serge	239	#ifdef __CYGWIN__ /* Cygwin starts with LC_CTYPE set to "C.UTF-8". */
		240	"C.UTF-8",
		241	#else
1693	serge	242	"C",
3065	serge	243	#endif
1693	serge	244	"C",
		245	"C",
		246	"C",
		247	"C",
		248	};
		249
		250	/*
		251	* The locales we are going to try and load
		252	*/
		253	static char new_categories[_LC_LAST][ENCODING_LEN + 1];
		254	static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
		255
		256	static char current_locale_string[_LC_LAST * (ENCODING_LEN + 1/"/"/ + 1)];
		257	static char *currentlocale(void);
		258	static char loadlocale(struct _reent , int);
		259	static const char __get_locale_env(struct _reent , int);
		260
		261	#endif /* _MB_CAPABLE */
		262
3065	serge	263	#ifdef __CYGWIN__
1693	serge	264	static char lc_ctype_charset[ENCODING_LEN + 1] = "UTF-8";
		265	#else
		266	static char lc_ctype_charset[ENCODING_LEN + 1] = "ASCII";
3065	serge	267	#endif
1693	serge	268	static char lc_message_charset[ENCODING_LEN + 1] = "ASCII";
		269	static int lc_ctype_cjk_lang = 0;
		270
		271	char *
		272	_DEFUN(_setlocale_r, (p, category, locale),
		273	struct _reent *p _AND
		274	int category _AND
		275	_CONST char *locale)
		276	{
		277	#ifndef _MB_CAPABLE
		278	if (locale)
		279	{
		280	if (strcmp (locale, "POSIX") && strcmp (locale, "C")
		281	&& strcmp (locale, ""))
		282	return NULL;
		283	}
		284	return "C";
		285	#else /* !_MB_CAPABLE */
		286	int i, j, len, saverr;
		287	const char env, r;
		288
		289	if (category < LC_ALL \|\| category >= _LC_LAST)
		290	{
		291	p->_errno = EINVAL;
		292	return NULL;
		293	}
		294
		295	if (locale == NULL)
		296	return category != LC_ALL ? current_categories[category] : currentlocale();
		297
		298	/*
		299	* Default to the current locale for everything.
		300	*/
		301	for (i = 1; i < _LC_LAST; ++i)
		302	strcpy (new_categories[i], current_categories[i]);
		303
		304	/*
		305	* Now go fill up new_categories from the locale argument
		306	*/
		307	if (!*locale)
		308	{
		309	if (category == LC_ALL)
		310	{
		311	for (i = 1; i < _LC_LAST; ++i)
		312	{
		313	env = __get_locale_env (p, i);
		314	if (strlen (env) > ENCODING_LEN)
		315	{
		316	p->_errno = EINVAL;
		317	return NULL;
		318	}
		319	strcpy (new_categories[i], env);
		320	}
		321	}
		322	else
		323	{
		324	env = __get_locale_env (p, category);
		325	if (strlen (env) > ENCODING_LEN)
		326	{
		327	p->_errno = EINVAL;
		328	return NULL;
		329	}
		330	strcpy (new_categories[category], env);
		331	}
		332	}
		333	else if (category != LC_ALL)
		334	{
		335	if (strlen (locale) > ENCODING_LEN)
		336	{
		337	p->_errno = EINVAL;
		338	return NULL;
		339	}
		340	strcpy (new_categories[category], locale);
		341	}
		342	else
		343	{
		344	if ((r = strchr (locale, '/')) == NULL)
		345	{
		346	if (strlen (locale) > ENCODING_LEN)
		347	{
		348	p->_errno = EINVAL;
		349	return NULL;
		350	}
		351	for (i = 1; i < _LC_LAST; ++i)
		352	strcpy (new_categories[i], locale);
		353	}
		354	else
		355	{
		356	for (i = 1; r[1] == '/'; ++r)
		357	;
		358	if (!r[1])
		359	{
		360	p->_errno = EINVAL;
		361	return NULL; /* Hmm, just slashes... */
		362	}
		363	do
		364	{
		365	if (i == _LC_LAST)
		366	break; /* Too many slashes... */
		367	if ((len = r - locale) > ENCODING_LEN)
		368	{
		369	p->_errno = EINVAL;
		370	return NULL;
		371	}
		372	strlcpy (new_categories[i], locale, len + 1);
		373	i++;
		374	while (*r == '/')
		375	r++;
		376	locale = r;
		377	while (r && r != '/')
		378	r++;
		379	}
		380	while (*locale);
		381	while (i < _LC_LAST)
		382	{
		383	strcpy (new_categories[i], new_categories[i-1]);
		384	i++;
		385	}
		386	}
		387	}
		388
		389	if (category != LC_ALL)
		390	return loadlocale (p, category);
		391
		392	for (i = 1; i < _LC_LAST; ++i)
		393	{
		394	strcpy (saved_categories[i], current_categories[i]);
		395	if (loadlocale (p, i) == NULL)
		396	{
		397	saverr = p->_errno;
		398	for (j = 1; j < i; j++)
		399	{
		400	strcpy (new_categories[j], saved_categories[j]);
		401	if (loadlocale (p, j) == NULL)
		402	{
		403	strcpy (new_categories[j], "C");
		404	loadlocale (p, j);
		405	}
		406	}
		407	p->_errno = saverr;
		408	return NULL;
		409	}
		410	}
		411	return currentlocale ();
		412	#endif /* !_MB_CAPABLE */
		413	}
		414
		415	#ifdef _MB_CAPABLE
		416	static char *
		417	currentlocale()
		418	{
		419	int i;
		420
		421	(void)strcpy(current_locale_string, current_categories[1]);
		422
		423	for (i = 2; i < _LC_LAST; ++i)
		424	if (strcmp(current_categories[1], current_categories[i])) {
		425	for (i = 2; i < _LC_LAST; ++i) {
		426	(void)strcat(current_locale_string, "/");
		427	(void)strcat(current_locale_string,
		428	current_categories[i]);
		429	}
		430	break;
		431	}
		432	return (current_locale_string);
		433	}
		434	#endif /* _MB_CAPABLE */
		435
		436	#ifdef _MB_CAPABLE
		437	#ifdef __CYGWIN__
		438	extern void __set_charset_from_locale (const char locale, char charset);
3065	serge	439	extern char __set_locale_from_locale_alias (const char , char *);
1693	serge	440	extern int __collate_load_locale (const char , void , const char *);
		441	#endif /* __CYGWIN__ */
		442
		443	extern void __set_ctype (const char *charset);
		444
		445	static char *
		446	loadlocale(struct _reent *p, int category)
		447	{
		448	/* At this point a full-featured system would just load the locale
		449	specific data from the locale files.
		450	What we do here for now is to check the incoming string for correctness.
		451	The string must be in one of the allowed locale strings, either
		452	one in POSIX-style, or one in the old newlib style to maintain
		453	backward compatibility. If the local string is correct, the charset
		454	is extracted and stored in lc_ctype_charset or lc_message_charset
		455	dependent on the cateogry. */
		456	char *locale = NULL;
		457	char charset[ENCODING_LEN + 1];
		458	unsigned long val;
3065	serge	459	char end, c = NULL;
1693	serge	460	int mbc_max;
		461	int (l_wctomb) (struct _reent , char , wchar_t, const char , mbstate_t *);
		462	int (l_mbtowc) (struct _reent , wchar_t , const char , size_t,
		463	const char , mbstate_t );
		464	int cjknarrow = 0;
		465
		466	/* Avoid doing everything twice if nothing has changed. */
		467	if (!strcmp (new_categories[category], current_categories[category]))
		468	return current_categories[category];
		469
		470	#ifdef __CYGWIN__
		471	/* This additional code handles the case that the incoming locale string
		472	is not valid. If so, it calls the function __set_locale_from_locale_alias,
		473	which is only available on Cygwin right now. The function reads the
		474	file /usr/share/locale/locale.alias. The file contains locale aliases
		475	and their replacement locale. For instance, the alias "french" is
		476	translated to "fr_FR.ISO-8859-1", the alias "thai" is translated to
		477	"th_TH.TIS-620". If successful, the function returns with a pointer
		478	to the second argument, which is a buffer in which the replacement locale
		479	gets stored. Otherwise the function returns NULL. */
		480	char tmp_locale[ENCODING_LEN + 1];
		481	int ret = 0;
		482
		483	restart:
		484	if (!locale)
		485	locale = new_categories[category];
		486	else if (locale != tmp_locale)
		487	{
		488	locale = __set_locale_from_locale_alias (locale, tmp_locale);
		489	if (!locale)
		490	return NULL;
		491	}
		492	# define FAIL goto restart
		493	#else
		494	locale = new_categories[category];
		495	# define FAIL return NULL
		496	#endif
		497
		498	/* "POSIX" is translated to "C", as on Linux. */
		499	if (!strcmp (locale, "POSIX"))
		500	strcpy (locale, "C");
		501	if (!strcmp (locale, "C")) /* Default "C" locale */
		502	strcpy (charset, "ASCII");
		503	else if (locale[0] == 'C'
		504	&& (locale[1] == '-' /* Old newlib style */
		505	\|\| locale[1] == '.')) /* Extension for the C locale to allow
		506	specifying different charsets while
		507	sticking to the C locale in terms
		508	of sort order, etc. Proposed in
		509	the Debian project. */
3065	serge	510	{
		511	char *chp;
		512
		513	c = locale + 2;
		514	strcpy (charset, c);
		515	if ((chp = strchr (charset, '@')))
		516	/* Strip off modifier */
		517	*chp = '\0';
		518	c += strlen (charset);
		519	}
1693	serge	520	else /* POSIX style */
		521	{
		522	c = locale;
		523
		524	/* Don't use ctype macros here, they might be localized. */
		525	/* Language */
		526	if (c[0] < 'a' \|\| c[0] > 'z'
		527	\|\| c[1] < 'a' \|\| c[1] > 'z')
		528	FAIL;
		529	c += 2;
		530	/* Allow three character Language per ISO 639-3 */
		531	if (c[0] >= 'a' && c[0] <= 'z')
		532	++c;
		533	if (c[0] == '_')
		534	{
		535	/* Territory */
		536	++c;
		537	if (c[0] < 'A' \|\| c[0] > 'Z'
		538	\|\| c[1] < 'A' \|\| c[1] > 'Z')
		539	FAIL;
		540	c += 2;
		541	}
		542	if (c[0] == '.')
		543	{
		544	/* Charset */
		545	char *chp;
		546
		547	++c;
		548	strcpy (charset, c);
		549	if ((chp = strchr (charset, '@')))
		550	/* Strip off modifier */
		551	*chp = '\0';
		552	c += strlen (charset);
		553	}
		554	else if (c[0] == '\0' \|\| c[0] == '@')
		555	/* End of string or just a modifier */
		556	#ifdef __CYGWIN__
		557	/* The Cygwin-only function __set_charset_from_locale checks
		558	for the default charset which is connected to the given locale.
		559	The function uses Windows functions in turn so it can't be easily
		560	adapted to other targets. However, if any other target provides
		561	equivalent functionality, preferrably using the same function name
		562	it would be sufficient to change the guarding #ifdef. */
		563	__set_charset_from_locale (locale, charset);
		564	#else
		565	strcpy (charset, "ISO-8859-1");
		566	#endif
		567	else
		568	/* Invalid string */
		569	FAIL;
3065	serge	570	}
		571	if (c && c[0] == '@')
1693	serge	572	{
		573	/* Modifier */
		574	/* Only one modifier is recognized right now. "cjknarrow" is used
		575	to modify the behaviour of wcwidth() for East Asian languages.
		576	For details see the comment at the end of this function. */
		577	if (!strcmp (c + 1, "cjknarrow"))
		578	cjknarrow = 1;
		579	}
		580	/* We only support this subset of charsets. */
		581	switch (charset[0])
		582	{
		583	case 'U':
		584	case 'u':
		585	if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
		586	FAIL;
		587	strcpy (charset, "UTF-8");
		588	mbc_max = 6;
		589	l_wctomb = __utf8_wctomb;
		590	l_mbtowc = __utf8_mbtowc;
		591	break;
		592	#ifndef __CYGWIN__
		593	/* Cygwin does not support JIS at all. */
		594	case 'J':
		595	case 'j':
		596	if (strcasecmp (charset, "JIS"))
		597	FAIL;
		598	strcpy (charset, "JIS");
		599	mbc_max = 8;
		600	l_wctomb = __jis_wctomb;
		601	l_mbtowc = __jis_mbtowc;
		602	break;
		603	#endif /* !__CYGWIN__ */
		604	case 'E':
		605	case 'e':
		606	if (strncasecmp (charset, "EUC", 3))
		607	FAIL;
		608	c = charset + 3;
		609	if (*c == '-')
		610	++c;
		611	if (!strcasecmp (c, "JP"))
		612	{
		613	strcpy (charset, "EUCJP");
		614	mbc_max = 3;
		615	l_wctomb = __eucjp_wctomb;
		616	l_mbtowc = __eucjp_mbtowc;
		617	}
		618	#ifdef __CYGWIN__
		619	/* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's
		620	implementation requires Windows support. */
		621	else if (!strcasecmp (c, "KR"))
		622	{
		623	strcpy (charset, "EUCKR");
		624	mbc_max = 2;
		625	l_wctomb = __kr_wctomb;
		626	l_mbtowc = __kr_mbtowc;
		627	}
		628	else if (!strcasecmp (c, "CN"))
		629	{
		630	strcpy (charset, "EUCCN");
		631	mbc_max = 2;
		632	l_wctomb = __gbk_wctomb;
		633	l_mbtowc = __gbk_mbtowc;
		634	}
		635	#endif /* __CYGWIN__ */
		636	else
		637	FAIL;
		638	break;
		639	case 'S':
		640	case 's':
		641	if (strcasecmp (charset, "SJIS"))
		642	FAIL;
		643	strcpy (charset, "SJIS");
		644	mbc_max = 2;
		645	l_wctomb = __sjis_wctomb;
		646	l_mbtowc = __sjis_mbtowc;
		647	break;
		648	case 'I':
		649	case 'i':
		650	/* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
		651	ISO-8859-12. This code also recognizes the aliases without dashes. */
		652	if (strncasecmp (charset, "ISO", 3))
		653	FAIL;
		654	c = charset + 3;
		655	if (*c == '-')
		656	++c;
		657	if (strncasecmp (c, "8859", 4))
		658	FAIL;
		659	c += 4;
		660	if (*c == '-')
		661	++c;
		662	val = _strtol_r (p, c, &end, 10);
		663	if (val < 1 \|\| val > 16 \|\| val == 12 \|\| *end)
		664	FAIL;
		665	strcpy (charset, "ISO-8859-");
		666	c = charset + 9;
		667	if (val > 10)
		668	*c++ = '1';
		669	*c++ = val % 10 + '0';
		670	*c = '\0';
		671	mbc_max = 1;
		672	#ifdef _MB_EXTENDED_CHARSETS_ISO
		673	l_wctomb = __iso_wctomb;
		674	l_mbtowc = __iso_mbtowc;
		675	#else /* !_MB_EXTENDED_CHARSETS_ISO */
		676	l_wctomb = __ascii_wctomb;
		677	l_mbtowc = __ascii_mbtowc;
		678	#endif /* _MB_EXTENDED_CHARSETS_ISO */
		679	break;
		680	case 'C':
		681	case 'c':
		682	if (charset[1] != 'P' && charset[1] != 'p')
		683	FAIL;
		684	strncpy (charset, "CP", 2);
		685	val = _strtol_r (p, charset + 2, &end, 10);
		686	if (*end)
		687	FAIL;
		688	switch (val)
		689	{
		690	case 437:
		691	case 720:
		692	case 737:
		693	case 775:
		694	case 850:
		695	case 852:
		696	case 855:
		697	case 857:
		698	case 858:
		699	case 862:
		700	case 866:
		701	case 874:
		702	case 1125:
		703	case 1250:
		704	case 1251:
		705	case 1252:
		706	case 1253:
		707	case 1254:
		708	case 1255:
		709	case 1256:
		710	case 1257:
		711	case 1258:
		712	mbc_max = 1;
		713	#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
		714	l_wctomb = __cp_wctomb;
		715	l_mbtowc = __cp_mbtowc;
		716	#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
		717	l_wctomb = __ascii_wctomb;
		718	l_mbtowc = __ascii_mbtowc;
		719	#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
		720	break;
		721	case 932:
		722	mbc_max = 2;
		723	l_wctomb = __sjis_wctomb;
		724	l_mbtowc = __sjis_mbtowc;
		725	break;
		726	default:
		727	FAIL;
		728	}
		729	break;
		730	case 'K':
		731	case 'k':
		732	/* KOI8-R, KOI8-U and the aliases without dash */
		733	if (strncasecmp (charset, "KOI8", 4))
		734	FAIL;
		735	c = charset + 4;
		736	if (*c == '-')
		737	++c;
		738	if (c == 'R' \|\| c == 'r')
		739	strcpy (charset, "CP20866");
		740	else if (c == 'U' \|\| c == 'u')
		741	strcpy (charset, "CP21866");
		742	else
		743	FAIL;
		744	mbc_max = 1;
		745	#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
		746	l_wctomb = __cp_wctomb;
		747	l_mbtowc = __cp_mbtowc;
		748	#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
		749	l_wctomb = __ascii_wctomb;
		750	l_mbtowc = __ascii_mbtowc;
		751	#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
		752	break;
		753	case 'A':
		754	case 'a':
		755	if (strcasecmp (charset, "ASCII"))
		756	FAIL;
		757	strcpy (charset, "ASCII");
		758	mbc_max = 1;
		759	l_wctomb = __ascii_wctomb;
		760	l_mbtowc = __ascii_mbtowc;
		761	break;
		762	case 'G':
		763	case 'g':
		764	#ifdef __CYGWIN__
		765	/* Newlib does not provide GBK/GB2312 and Cygwin's implementation
		766	requires Windows support. */
		767	if (!strcasecmp (charset, "GBK")
		768	\|\| !strcasecmp (charset, "GB2312"))
		769	{
		770	strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK");
		771	mbc_max = 2;
		772	l_wctomb = __gbk_wctomb;
		773	l_mbtowc = __gbk_mbtowc;
		774	}
		775	else
		776	#endif /* __CYGWIN__ */
		777	/* GEORGIAN-PS and the alias without dash */
		778	if (!strncasecmp (charset, "GEORGIAN", 8))
		779	{
		780	c = charset + 8;
		781	if (*c == '-')
		782	++c;
		783	if (strcasecmp (c, "PS"))
		784	FAIL;
		785	strcpy (charset, "CP101");
		786	mbc_max = 1;
		787	#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
		788	l_wctomb = __cp_wctomb;
		789	l_mbtowc = __cp_mbtowc;
		790	#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
		791	l_wctomb = __ascii_wctomb;
		792	l_mbtowc = __ascii_mbtowc;
		793	#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
		794	}
		795	else
		796	FAIL;
		797	break;
		798	case 'P':
		799	case 'p':
		800	/* PT154 */
		801	if (strcasecmp (charset, "PT154"))
		802	FAIL;
		803	strcpy (charset, "CP102");
		804	mbc_max = 1;
		805	#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
		806	l_wctomb = __cp_wctomb;
		807	l_mbtowc = __cp_mbtowc;
		808	#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
		809	l_wctomb = __ascii_wctomb;
		810	l_mbtowc = __ascii_mbtowc;
		811	#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
		812	break;
		813	case 'T':
		814	case 't':
		815	if (strncasecmp (charset, "TIS", 3))
		816	FAIL;
		817	c = charset + 3;
		818	if (*c == '-')
		819	++c;
		820	if (strcasecmp (c, "620"))
		821	FAIL;
		822	strcpy (charset, "CP874");
		823	mbc_max = 1;
		824	#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
		825	l_wctomb = __cp_wctomb;
		826	l_mbtowc = __cp_mbtowc;
		827	#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
		828	l_wctomb = __ascii_wctomb;
		829	l_mbtowc = __ascii_mbtowc;
		830	#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
		831	break;
		832	#ifdef __CYGWIN__
		833	/* Newlib does not provide Big5 and Cygwin's implementation
		834	requires Windows support. */
		835	case 'B':
		836	case 'b':
		837	if (strcasecmp (charset, "BIG5"))
		838	FAIL;
		839	strcpy (charset, "BIG5");
		840	mbc_max = 2;
		841	l_wctomb = __big5_wctomb;
		842	l_mbtowc = __big5_mbtowc;
		843	break;
		844	#endif /* __CYGWIN__ */
		845	default:
		846	FAIL;
		847	}
		848	switch (category)
		849	{
		850	case LC_CTYPE:
		851	strcpy (lc_ctype_charset, charset);
		852	__mb_cur_max = mbc_max;
		853	__wctomb = l_wctomb;
		854	__mbtowc = l_mbtowc;
		855	__set_ctype (charset);
3065	serge	856	/* Determine the width for the "CJK Ambiguous Width" category of
		857	characters. This is used in wcwidth(). Assume single width for
		858	single-byte charsets, and double width for multi-byte charsets
		859	other than UTF-8. For UTF-8, use double width for the East Asian
		860	languages ("ja", "ko", "zh"), and single width for everything else.
		861	Single width can also be forced with the "@cjknarrow" modifier. */
1693	serge	862	lc_ctype_cjk_lang = !cjknarrow
3065	serge	863	&& mbc_max > 1
		864	&& (charset[0] != 'U'
		865	\|\| strncmp (locale, "ja", 2) == 0
1693	serge	866	\|\| strncmp (locale, "ko", 2) == 0
3065	serge	867	\|\| strncmp (locale, "zh", 2) == 0);
1693	serge	868	#ifdef __HAVE_LOCALE_INFO__
		869	ret = __ctype_load_locale (locale, (void *) l_wctomb, charset, mbc_max);
		870	#endif /* __HAVE_LOCALE_INFO__ */
		871	break;
		872	case LC_MESSAGES:
		873	strcpy (lc_message_charset, charset);
		874	#ifdef __HAVE_LOCALE_INFO__
		875	ret = __messages_load_locale (locale, (void *) l_wctomb, charset);
		876	if (!ret)
		877	#endif /* __HAVE_LOCALE_INFO__ */
		878	break;
		879	#ifdef __HAVE_LOCALE_INFO__
		880	#ifdef __CYGWIN__
		881	/* Right now only Cygwin supports a __collate_load_locale function at all. */
		882	case LC_COLLATE:
		883	ret = __collate_load_locale (locale, (void *) l_mbtowc, charset);
		884	break;
		885	#endif
		886	case LC_MONETARY:
		887	ret = __monetary_load_locale (locale, (void *) l_wctomb, charset);
		888	break;
		889	case LC_NUMERIC:
		890	ret = __numeric_load_locale (locale, (void *) l_wctomb, charset);
		891	break;
		892	case LC_TIME:
		893	ret = __time_load_locale (locale, (void *) l_wctomb, charset);
		894	break;
		895	#endif /* __HAVE_LOCALE_INFO__ */
		896	default:
		897	break;
		898	}
		899	#ifdef __HAVE_LOCALE_INFO__
		900	if (ret)
		901	FAIL;
		902	#endif /* __HAVE_LOCALE_INFO__ */
		903	return strcpy(current_categories[category], new_categories[category]);
		904	}
		905
		906	static const char *
		907	__get_locale_env(struct _reent *p, int category)
		908	{
		909	const char *env;
		910
		911	/* 1. check LC_ALL. */
		912	env = _getenv_r (p, categories[0]);
		913
		914	/* 2. check LC_* */
		915	if (env == NULL \|\| !*env)
		916	env = _getenv_r (p, categories[category]);
		917
		918	/* 3. check LANG */
		919	if (env == NULL \|\| !*env)
		920	env = _getenv_r (p, "LANG");
		921
		922	/* 4. if none is set, fall to default locale */
		923	if (env == NULL \|\| !*env)
		924	env = __default_locale;
		925
		926	return env;
		927	}
		928	#endif /* _MB_CAPABLE */
		929
		930	char *
		931	_DEFUN_VOID(__locale_charset)
		932	{
		933	#if 0//def __HAVE_LOCALE_INFO__
		934	return __get_current_ctype_locale ()->codeset;
		935	#else
		936	return lc_ctype_charset;
		937	#endif
		938	}
		939
		940	int
		941	_DEFUN_VOID(__locale_mb_cur_max)
		942	{
		943	#if 0//def __HAVE_LOCALE_INFO__
		944	return __get_current_ctype_locale ()->mb_cur_max[0];
		945	#else
		946	return __mb_cur_max;
		947	#endif
		948	}
		949
		950
		951	char *
		952	_DEFUN_VOID(__locale_msgcharset)
		953	{
		954	#ifdef __HAVE_LOCALE_INFO__
3065	serge	955	return (char *) __get_current_messages_locale ()->codeset;
1693	serge	956	#else
		957	return lc_message_charset;
		958	#endif
		959	}
		960
		961	int
		962	_DEFUN_VOID(__locale_cjk_lang)
		963	{
		964	return lc_ctype_cjk_lang;
		965	}
		966
		967	struct lconv *
		968	_DEFUN(_localeconv_r, (data),
		969	struct _reent *data)
		970	{
		971	#ifdef __HAVE_LOCALE_INFO__
		972	if (__nlocale_changed)
		973	{
		974	struct lc_numeric_T *n = __get_current_numeric_locale ();
3065	serge	975	lconv.decimal_point = (char *) n->decimal_point;
		976	lconv.thousands_sep = (char *) n->thousands_sep;
		977	lconv.grouping = (char *) n->grouping;
1693	serge	978	__nlocale_changed = 0;
		979	}
		980	if (__mlocale_changed)
		981	{
		982	struct lc_monetary_T *m = __get_current_monetary_locale ();
3065	serge	983	lconv.int_curr_symbol = (char *) m->int_curr_symbol;
		984	lconv.currency_symbol = (char *) m->currency_symbol;
		985	lconv.mon_decimal_point = (char *) m->mon_decimal_point;
		986	lconv.mon_thousands_sep = (char *) m->mon_thousands_sep;
		987	lconv.mon_grouping = (char *) m->mon_grouping;
		988	lconv.positive_sign = (char *) m->positive_sign;
		989	lconv.negative_sign = (char *) m->negative_sign;
1693	serge	990	lconv.int_frac_digits = m->int_frac_digits[0];
		991	lconv.frac_digits = m->frac_digits[0];
		992	lconv.p_cs_precedes = m->p_cs_precedes[0];
		993	lconv.p_sep_by_space = m->p_sep_by_space[0];
		994	lconv.n_cs_precedes = m->n_cs_precedes[0];
		995	lconv.n_sep_by_space = m->n_sep_by_space[0];
		996	lconv.p_sign_posn = m->p_sign_posn[0];
		997	lconv.n_sign_posn = m->n_sign_posn[0];
		998	#ifdef __HAVE_LOCALE_INFO_EXTENDED__
		999	lconv.int_p_cs_precedes = m->int_p_cs_precedes[0];
		1000	lconv.int_p_sep_by_space = m->int_p_sep_by_space[0];
		1001	lconv.int_n_cs_precedes = m->int_n_cs_precedes[0];
		1002	lconv.int_n_sep_by_space = m->int_n_sep_by_space[0];
		1003	lconv.int_n_sign_posn = m->int_n_sign_posn[0];
		1004	lconv.int_p_sign_posn = m->int_p_sign_posn[0];
		1005	#else /* !__HAVE_LOCALE_INFO_EXTENDED__ */
		1006	lconv.int_p_cs_precedes = m->p_cs_precedes[0];
		1007	lconv.int_p_sep_by_space = m->p_sep_by_space[0];
		1008	lconv.int_n_cs_precedes = m->n_cs_precedes[0];
		1009	lconv.int_n_sep_by_space = m->n_sep_by_space[0];
		1010	lconv.int_n_sign_posn = m->n_sign_posn[0];
		1011	lconv.int_p_sign_posn = m->p_sign_posn[0];
		1012	#endif /* !__HAVE_LOCALE_INFO_EXTENDED__ */
		1013	__mlocale_changed = 0;
		1014	}
		1015	#endif /* __HAVE_LOCALE_INFO__ */
		1016	return (struct lconv *) &lconv;
		1017	}
		1018
		1019	#ifndef _REENT_ONLY
		1020
		1021	#ifndef __CYGWIN__
		1022	/* Cygwin provides its own version of setlocale to perform some more
		1023	initialization work. It calls _setlocale_r, though. */
		1024	char *
		1025	_DEFUN(setlocale, (category, locale),
		1026	int category _AND
		1027	_CONST char *locale)
		1028	{
		1029	return _setlocale_r (_REENT, category, locale);
		1030	}
		1031	#endif /* __CYGWIN__ */
		1032
		1033	struct lconv *
		1034	_DEFUN_VOID(localeconv)
		1035	{
		1036	return _localeconv_r (_REENT);
		1037	}
		1038
		1039	#endif

Subversion Repositories Kolibri OS

(root)/programs/develop/libraries/newlib/locale/locale.c – Rev 3072