Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
3584 | sourcerer | 1 | /* |
2 | * This file is part of libdom. |
||
3 | * Licensed under the MIT License, |
||
4 | * http://www.opensource.org/licenses/mit-license.php |
||
5 | * Copyright 2009 Bo Yang |
||
6 | */ |
||
7 | |||
8 | #include |
||
9 | #include |
||
10 | |||
11 | #include "utils/validate.h" |
||
12 | |||
13 | #include |
||
14 | |||
15 | #include "utils/character_valid.h" |
||
16 | #include "utils/namespace.h" |
||
17 | #include "utils/utils.h" |
||
18 | |||
19 | #include |
||
20 | |||
21 | /* An combination of various tests */ |
||
22 | static bool is_first_char(uint32_t ch); |
||
23 | static bool is_name_char(uint32_t ch); |
||
24 | |||
25 | /* Test whether the character can be the first character of |
||
26 | * a NCName. */ |
||
27 | static bool is_first_char(uint32_t ch) |
||
28 | { |
||
29 | /* Refer http://www.w3.org/TR/REC-xml/ for detail */ |
||
30 | if (((ch >= 'a') && (ch <= 'z')) || |
||
31 | ((ch >= 'A') && (ch <= 'Z')) || |
||
32 | (ch == '_') || (ch == ':') || |
||
33 | ((ch >= 0xC0) && (ch <= 0xD6)) || |
||
34 | ((ch >= 0xD8) && (ch <= 0xF6)) || |
||
35 | ((ch >= 0xF8) && (ch <= 0x2FF)) || |
||
36 | ((ch >= 0x370) && (ch <= 0x37D)) || |
||
37 | ((ch >= 0x37F) && (ch <= 0x1FFF)) || |
||
38 | ((ch >= 0x200C) && (ch <= 0x200D)) || |
||
39 | ((ch >= 0x2070) && (ch <= 0x218F)) || |
||
40 | ((ch >= 0x2C00) && (ch <= 0x2FEF)) || |
||
41 | ((ch >= 0x3001) && (ch <= 0xD7FF)) || |
||
42 | ((ch >= 0xF900) && (ch <= 0xFDCF)) || |
||
43 | ((ch >= 0xFDF0) && (ch <= 0xFFFD)) || |
||
44 | ((ch >= 0x10000) && (ch <= 0xEFFFF))) |
||
45 | return true; |
||
46 | |||
47 | if (is_letter(ch) || ch == (uint32_t) '_' || ch == (uint32_t) ':') { |
||
48 | return true; |
||
49 | } |
||
50 | |||
51 | return false; |
||
52 | } |
||
53 | |||
54 | /* Test whether the character can be a part of a NCName */ |
||
55 | static bool is_name_char(uint32_t ch) |
||
56 | { |
||
57 | /* Refer http://www.w3.org/TR/REC-xml/ for detail */ |
||
58 | if (((ch >= 'a') && (ch <= 'z')) || |
||
59 | ((ch >= 'A') && (ch <= 'Z')) || |
||
60 | ((ch >= '0') && (ch <= '9')) || /* !start */ |
||
61 | (ch == '_') || (ch == ':') || |
||
62 | (ch == '-') || (ch == '.') || (ch == 0xB7) || /* !start */ |
||
63 | ((ch >= 0xC0) && (ch <= 0xD6)) || |
||
64 | ((ch >= 0xD8) && (ch <= 0xF6)) || |
||
65 | ((ch >= 0xF8) && (ch <= 0x2FF)) || |
||
66 | ((ch >= 0x300) && (ch <= 0x36F)) || /* !start */ |
||
67 | ((ch >= 0x370) && (ch <= 0x37D)) || |
||
68 | ((ch >= 0x37F) && (ch <= 0x1FFF)) || |
||
69 | ((ch >= 0x200C) && (ch <= 0x200D)) || |
||
70 | ((ch >= 0x203F) && (ch <= 0x2040)) || /* !start */ |
||
71 | ((ch >= 0x2070) && (ch <= 0x218F)) || |
||
72 | ((ch >= 0x2C00) && (ch <= 0x2FEF)) || |
||
73 | ((ch >= 0x3001) && (ch <= 0xD7FF)) || |
||
74 | ((ch >= 0xF900) && (ch <= 0xFDCF)) || |
||
75 | ((ch >= 0xFDF0) && (ch <= 0xFFFD)) || |
||
76 | ((ch >= 0x10000) && (ch <= 0xEFFFF))) |
||
77 | return true; |
||
78 | |||
79 | if (is_letter(ch) == true) |
||
80 | return true; |
||
81 | if (is_digit(ch) == true) |
||
82 | return true; |
||
83 | if (is_combining_char(ch) == true) |
||
84 | return true; |
||
85 | if (is_extender(ch) == true) |
||
86 | return true; |
||
87 | |||
88 | if (ch == (uint32_t) '.' || ch == (uint32_t) '-' || |
||
89 | ch == (uint32_t) '_' || ch == (uint32_t) ':') |
||
90 | return true; |
||
91 | |||
92 | return false; |
||
93 | } |
||
94 | |||
95 | /** |
||
96 | * Test whether the name is a valid one according XML 1.0 standard. |
||
97 | * For the standard please refer: |
||
98 | * |
||
99 | * http://www.w3.org/TR/2004/REC-xml-20040204/ |
||
100 | * |
||
101 | * \param name The name need to be tested |
||
102 | * \return true if ::name is valid, false otherwise. |
||
103 | */ |
||
104 | bool _dom_validate_name(dom_string *name) |
||
105 | { |
||
106 | uint32_t ch; |
||
107 | size_t clen, slen; |
||
108 | parserutils_error err; |
||
109 | const uint8_t *s; |
||
110 | |||
111 | if (name == NULL) |
||
112 | return false; |
||
113 | |||
114 | slen = dom_string_length(name); |
||
115 | if (slen == 0) |
||
116 | return false; |
||
117 | |||
118 | s = (const uint8_t *) dom_string_data(name); |
||
119 | slen = dom_string_byte_length(name); |
||
120 | |||
121 | err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen); |
||
122 | if (err != PARSERUTILS_OK) { |
||
123 | return false; |
||
124 | } |
||
125 | |||
126 | if (is_first_char(ch) == false) |
||
127 | return false; |
||
128 | |||
129 | s += clen; |
||
130 | slen -= clen; |
||
131 | |||
132 | while (slen > 0) { |
||
133 | err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen); |
||
134 | if (err != PARSERUTILS_OK) { |
||
135 | return false; |
||
136 | } |
||
137 | |||
138 | if (is_name_char(ch) == false) |
||
139 | return false; |
||
140 | |||
141 | s += clen; |
||
142 | slen -= clen; |
||
143 | } |
||
144 | |||
145 | return true; |
||
146 | } |
||
147 | |||
148 | /** |
||
149 | * Validate whether the string is a legal NCName. |
||
150 | * Refer http://www.w3.org/TR/REC-xml-names/ for detail. |
||
151 | * |
||
152 | * \param str The name to validate |
||
153 | * \return true if ::name is valid, false otherwise. |
||
154 | */ |
||
155 | bool _dom_validate_ncname(dom_string *name) |
||
156 | { |
||
157 | uint32_t ch; |
||
158 | size_t clen, slen; |
||
159 | parserutils_error err; |
||
160 | const uint8_t *s; |
||
161 | |||
162 | if (name == NULL) |
||
163 | return false; |
||
164 | |||
165 | slen = dom_string_length(name); |
||
166 | if (slen == 0) |
||
167 | return false; |
||
168 | |||
169 | s = (const uint8_t *) dom_string_data(name); |
||
170 | slen = dom_string_byte_length(name); |
||
171 | |||
172 | err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen); |
||
173 | if (err != PARSERUTILS_OK) { |
||
174 | return false; |
||
175 | } |
||
176 | |||
177 | if (is_letter(ch) == false && ch != (uint32_t) '_') |
||
178 | return false; |
||
179 | |||
180 | s += clen; |
||
181 | slen -= clen; |
||
182 | |||
183 | while (slen > 0) { |
||
184 | err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen); |
||
185 | if (err != PARSERUTILS_OK) { |
||
186 | return false; |
||
187 | } |
||
188 | |||
189 | if (is_name_char(ch) == false) |
||
190 | return false; |
||
191 | |||
192 | if (ch == (uint32_t) ':') |
||
193 | return false; |
||
194 | |||
195 | s += clen; |
||
196 | slen -= clen; |
||
197 | } |
||
198 | |||
199 | return true; |
||
200 | }=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=> |
||
201 |