Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3584 sourcerer 1
/*
2
 * This file is part of libdom.
3
 * Licensed under the MIT License,
4
 *			http://www.opensource.org/licenses/mit-license.php
5
 * Copyright 2009 Bo Yang 
6
 */
7
 
8
#include 
9
#include 
10
 
11
#include "utils/validate.h"
12
 
13
#include 
14
 
15
#include "utils/character_valid.h"
16
#include "utils/namespace.h"
17
#include "utils/utils.h"
18
 
19
#include 
20
 
21
/* An combination of various tests */
22
static bool is_first_char(uint32_t ch);
23
static bool is_name_char(uint32_t ch);
24
 
25
/* Test whether the character can be the first character of
26
 * a NCName. */
27
static bool is_first_char(uint32_t ch)
28
{
29
	/* Refer http://www.w3.org/TR/REC-xml/ for detail */
30
	if (((ch >= 'a') && (ch <= 'z')) ||
31
		((ch >= 'A') && (ch <= 'Z')) ||
32
		(ch == '_') || (ch == ':') ||
33
		((ch >= 0xC0) && (ch <= 0xD6)) ||
34
		((ch >= 0xD8) && (ch <= 0xF6)) ||
35
		((ch >= 0xF8) && (ch <= 0x2FF)) ||
36
		((ch >= 0x370) && (ch <= 0x37D)) ||
37
		((ch >= 0x37F) && (ch <= 0x1FFF)) ||
38
		((ch >= 0x200C) && (ch <= 0x200D)) ||
39
		((ch >= 0x2070) && (ch <= 0x218F)) ||
40
		((ch >= 0x2C00) && (ch <= 0x2FEF)) ||
41
		((ch >= 0x3001) && (ch <= 0xD7FF)) ||
42
		((ch >= 0xF900) && (ch <= 0xFDCF)) ||
43
		((ch >= 0xFDF0) && (ch <= 0xFFFD)) ||
44
		((ch >= 0x10000) && (ch <= 0xEFFFF)))
45
		return true;
46
 
47
	if (is_letter(ch) || ch == (uint32_t) '_' || ch == (uint32_t) ':') {
48
		return true;
49
	}
50
 
51
	return false;
52
}
53
 
54
/* Test whether the character can be a part of a NCName */
55
static bool is_name_char(uint32_t ch)
56
{
57
	/* Refer http://www.w3.org/TR/REC-xml/ for detail */
58
	if (((ch >= 'a') && (ch <= 'z')) ||
59
		((ch >= 'A') && (ch <= 'Z')) ||
60
		((ch >= '0') && (ch <= '9')) || /* !start */
61
		(ch == '_') || (ch == ':') ||
62
		(ch == '-') || (ch == '.') || (ch == 0xB7) || /* !start */
63
		((ch >= 0xC0) && (ch <= 0xD6)) ||
64
		((ch >= 0xD8) && (ch <= 0xF6)) ||
65
		((ch >= 0xF8) && (ch <= 0x2FF)) ||
66
		((ch >= 0x300) && (ch <= 0x36F)) || /* !start */
67
		((ch >= 0x370) && (ch <= 0x37D)) ||
68
		((ch >= 0x37F) && (ch <= 0x1FFF)) ||
69
		((ch >= 0x200C) && (ch <= 0x200D)) ||
70
		((ch >= 0x203F) && (ch <= 0x2040)) || /* !start */
71
		((ch >= 0x2070) && (ch <= 0x218F)) ||
72
		((ch >= 0x2C00) && (ch <= 0x2FEF)) ||
73
		((ch >= 0x3001) && (ch <= 0xD7FF)) ||
74
		((ch >= 0xF900) && (ch <= 0xFDCF)) ||
75
		((ch >= 0xFDF0) && (ch <= 0xFFFD)) ||
76
		((ch >= 0x10000) && (ch <= 0xEFFFF)))
77
		return true;
78
 
79
	if (is_letter(ch) == true)
80
		return true;
81
	if (is_digit(ch) == true)
82
		return true;
83
	if (is_combining_char(ch) == true)
84
		return true;
85
	if (is_extender(ch) == true)
86
		return true;
87
 
88
	if (ch == (uint32_t) '.' || ch == (uint32_t) '-' ||
89
			ch == (uint32_t) '_' || ch == (uint32_t) ':')
90
		return true;
91
 
92
	return false;
93
}
94
 
95
/**
96
 * Test whether the name is a valid one according XML 1.0 standard.
97
 * For the standard please refer:
98
 *
99
 * http://www.w3.org/TR/2004/REC-xml-20040204/
100
 *
101
 * \param name  The name need to be tested
102
 * \return true if ::name is valid, false otherwise.
103
 */
104
bool _dom_validate_name(dom_string *name)
105
{
106
	uint32_t ch;
107
	size_t clen, slen;
108
	parserutils_error err;
109
	const uint8_t *s;
110
 
111
	if (name == NULL)
112
		return false;
113
 
114
	slen = dom_string_length(name);
115
	if (slen == 0)
116
		return false;
117
 
118
	s = (const uint8_t *) dom_string_data(name);
119
	slen = dom_string_byte_length(name);
120
 
121
	err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen);
122
	if (err != PARSERUTILS_OK) {
123
		return false;
124
	}
125
 
126
	if (is_first_char(ch) == false)
127
		return false;
128
 
129
	s += clen;
130
	slen -= clen;
131
 
132
	while (slen > 0) {
133
		err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen);
134
		if (err != PARSERUTILS_OK) {
135
			return false;
136
		}
137
 
138
		if (is_name_char(ch) == false)
139
			return false;
140
 
141
		s += clen;
142
		slen -= clen;
143
	}
144
 
145
	return true;
146
}
147
 
148
/**
149
 * Validate whether the string is a legal NCName.
150
 * Refer http://www.w3.org/TR/REC-xml-names/ for detail.
151
 *
152
 * \param str  The name to validate
153
 * \return true if ::name is valid, false otherwise.
154
 */
155
bool _dom_validate_ncname(dom_string *name)
156
{
157
	uint32_t ch;
158
	size_t clen, slen;
159
	parserutils_error err;
160
	const uint8_t *s;
161
 
162
	if (name == NULL)
163
		return false;
164
 
165
	slen = dom_string_length(name);
166
	if (slen == 0)
167
		return false;
168
 
169
	s = (const uint8_t *) dom_string_data(name);
170
	slen = dom_string_byte_length(name);
171
 
172
	err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen);
173
	if (err != PARSERUTILS_OK) {
174
		return false;
175
	}
176
 
177
	if (is_letter(ch) == false && ch != (uint32_t) '_')
178
		return false;
179
 
180
	s += clen;
181
	slen -= clen;
182
 
183
	while (slen > 0) {
184
		err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen);
185
		if (err != PARSERUTILS_OK) {
186
			return false;
187
		}
188
 
189
		if (is_name_char(ch) == false)
190
			return false;
191
 
192
		if (ch == (uint32_t) ':')
193
			return false;
194
 
195
		s += clen;
196
		slen -= clen;
197
	}
198
 
199
	return true;
200
}
201