Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4680 | right-hear | 1 | #include "fitz.h" |
2 | #include "muxps.h" |
||
3 | |||
4 | struct attribute |
||
5 | { |
||
6 | char name[40]; |
||
7 | char *value; |
||
8 | struct attribute *next; |
||
9 | }; |
||
10 | |||
11 | struct element |
||
12 | { |
||
13 | char name[40]; |
||
14 | struct attribute *atts; |
||
15 | struct element *up, *down, *next; |
||
16 | }; |
||
17 | |||
18 | struct parser |
||
19 | { |
||
20 | struct element *head; |
||
21 | }; |
||
22 | |||
23 | static inline void indent(int n) |
||
24 | { |
||
25 | while (n--) putchar(' '); |
||
26 | } |
||
27 | |||
28 | void xml_print_element(struct element *item, int level) |
||
29 | { |
||
30 | while (item) { |
||
31 | struct attribute *att; |
||
32 | indent(level); |
||
33 | printf("<%s", item->name); |
||
34 | for (att = item->atts; att; att = att->next) |
||
35 | printf(" %s=\"%s\"", att->name, att->value); |
||
36 | if (item->down) { |
||
37 | printf(">\n"); |
||
38 | xml_print_element(item->down, level + 1); |
||
39 | indent(level); |
||
40 | printf("%s>\n", item->name); |
||
41 | } |
||
42 | else { |
||
43 | printf("/>\n"); |
||
44 | } |
||
45 | item = item->next; |
||
46 | } |
||
47 | } |
||
48 | |||
49 | struct element *xml_next(struct element *item) |
||
50 | { |
||
51 | return item->next; |
||
52 | } |
||
53 | |||
54 | struct element *xml_down(struct element *item) |
||
55 | { |
||
56 | return item->down; |
||
57 | } |
||
58 | |||
59 | char *xml_tag(struct element *item) |
||
60 | { |
||
61 | return item->name; |
||
62 | } |
||
63 | |||
64 | char *xml_att(struct element *item, const char *name) |
||
65 | { |
||
66 | struct attribute *att; |
||
67 | for (att = item->atts; att; att = att->next) |
||
68 | if (!strcmp(att->name, name)) |
||
69 | return att->value; |
||
70 | return NULL; |
||
71 | } |
||
72 | |||
73 | static void xml_free_attribute(struct attribute *att) |
||
74 | { |
||
75 | while (att) { |
||
76 | struct attribute *next = att->next; |
||
77 | if (att->value) |
||
78 | fz_free(att->value); |
||
79 | fz_free(att); |
||
80 | att = next; |
||
81 | } |
||
82 | } |
||
83 | |||
84 | void xml_free_element(struct element *item) |
||
85 | { |
||
86 | while (item) { |
||
87 | struct element *next = item->next; |
||
88 | if (item->atts) |
||
89 | xml_free_attribute(item->atts); |
||
90 | if (item->down) |
||
91 | xml_free_element(item->down); |
||
92 | fz_free(item); |
||
93 | item = next; |
||
94 | } |
||
95 | } |
||
96 | |||
97 | static int xml_parse_entity(int *c, char *a) |
||
98 | { |
||
99 | char *b; |
||
100 | if (a[1] == '#') { |
||
101 | if (a[2] == 'x') |
||
102 | *c = strtol(a + 3, &b, 16); |
||
103 | else |
||
104 | *c = strtol(a + 2, &b, 10); |
||
105 | if (*b == ';') |
||
106 | return b - a + 1; |
||
107 | } |
||
108 | else if (a[1] == 'l' && a[2] == 't' && a[3] == ';') { |
||
109 | *c = '<'; |
||
110 | return 4; |
||
111 | } |
||
112 | else if (a[1] == 'g' && a[2] == 't' && a[3] == ';') { |
||
113 | *c = '>'; |
||
114 | return 4; |
||
115 | } |
||
116 | else if (a[1] == 'a' && a[2] == 'm' && a[3] == 'p' && a[4] == ';') { |
||
117 | *c = '&'; |
||
118 | return 5; |
||
119 | } |
||
120 | else if (a[1] == 'a' && a[2] == 'p' && a[3] == 'o' && a[4] == 's' && a[5] == ';') { |
||
121 | *c = '\''; |
||
122 | return 6; |
||
123 | } |
||
124 | else if (a[1] == 'q' && a[2] == 'u' && a[3] == 'o' && a[4] == 't' && a[5] == ';') { |
||
125 | *c = '"'; |
||
126 | return 6; |
||
127 | } |
||
128 | *c = *a++; |
||
129 | return 1; |
||
130 | } |
||
131 | |||
132 | static void xml_emit_open_tag(struct parser *parser, char *a, char *b) |
||
133 | { |
||
134 | struct element *head, *tail; |
||
135 | |||
136 | head = fz_malloc(sizeof(struct element)); |
||
137 | if (b - a > sizeof(head->name)) |
||
138 | b = a + sizeof(head->name); |
||
139 | memcpy(head->name, a, b - a); |
||
140 | head->name[b - a] = 0; |
||
141 | |||
142 | head->atts = NULL; |
||
143 | head->up = parser->head; |
||
144 | head->down = NULL; |
||
145 | head->next = NULL; |
||
146 | |||
147 | if (!parser->head->down) { |
||
148 | parser->head->down = head; |
||
149 | } |
||
150 | else { |
||
151 | tail = parser->head->down; |
||
152 | while (tail->next) |
||
153 | tail = tail->next; |
||
154 | tail->next = head; |
||
155 | } |
||
156 | |||
157 | parser->head = head; |
||
158 | } |
||
159 | |||
160 | static void xml_emit_att_name(struct parser *parser, char *a, char *b) |
||
161 | { |
||
162 | struct element *head = parser->head; |
||
163 | struct attribute *att; |
||
164 | |||
165 | att = fz_malloc(sizeof(struct attribute)); |
||
166 | if (b - a > sizeof(att->name)) |
||
167 | b = a + sizeof(att->name); |
||
168 | memcpy(att->name, a, b - a); |
||
169 | att->name[b - a] = 0; |
||
170 | att->value = NULL; |
||
171 | att->next = head->atts; |
||
172 | head->atts = att; |
||
173 | } |
||
174 | |||
175 | static void xml_emit_att_value(struct parser *parser, char *a, char *b) |
||
176 | { |
||
177 | struct element *head = parser->head; |
||
178 | struct attribute *att = head->atts; |
||
179 | char *s; |
||
180 | int c; |
||
181 | |||
182 | /* entities are all longer than UTFmax so runetochar is safe */ |
||
183 | s = att->value = fz_malloc(b - a + 1); |
||
184 | while (a < b) { |
||
185 | if (*a == '&') { |
||
186 | a += xml_parse_entity(&c, a); |
||
187 | s += runetochar(s, &c); |
||
188 | } |
||
189 | else { |
||
190 | *s++ = *a++; |
||
191 | } |
||
192 | } |
||
193 | *s = 0; |
||
194 | } |
||
195 | |||
196 | static void xml_emit_close_tag(struct parser *parser) |
||
197 | { |
||
198 | if (parser->head->up) |
||
199 | parser->head = parser->head->up; |
||
200 | } |
||
201 | |||
202 | static inline int isname(int c) |
||
203 | { |
||
204 | return c == '.' || c == '-' || c == '_' || c == ':' || |
||
205 | (c >= '0' && c <= '9') || |
||
206 | (c >= 'A' && c <= 'Z') || |
||
207 | (c >= 'a' && c <= 'z'); |
||
208 | } |
||
209 | |||
210 | static inline int iswhite(int c) |
||
211 | { |
||
212 | return c == ' ' || c == '\r' || c == '\n' || c == '\t'; |
||
213 | } |
||
214 | |||
215 | static char *xml_parse_document_imp(struct parser *x, char *p) |
||
216 | { |
||
217 | char *mark; |
||
218 | int quote; |
||
219 | |||
220 | parse_text: |
||
221 | mark = p; |
||
222 | while (*p && *p != '<') ++p; |
||
223 | if (*p == '<') { ++p; goto parse_element; } |
||
224 | return NULL; |
||
225 | |||
226 | parse_element: |
||
227 | if (*p == '/') { ++p; goto parse_closing_element; } |
||
228 | if (*p == '!') { ++p; goto parse_comment; } |
||
229 | if (*p == '?') { ++p; goto parse_processing_instruction; } |
||
230 | while (iswhite(*p)) ++p; |
||
231 | if (isname(*p)) |
||
232 | goto parse_element_name; |
||
233 | return "syntax error in element"; |
||
234 | |||
235 | parse_comment: |
||
236 | if (*p == '[') goto parse_cdata; |
||
237 | if (*p++ != '-') return "syntax error in comment ( |
||
238 | if (*p++ != '-') return "syntax error in comment ( |
||
239 | mark = p; |
||
240 | while (*p) { |
||
241 | if (p[0] == '-' && p[1] == '-' && p[2] == '>') { |
||
242 | p += 3; |
||
243 | goto parse_text; |
||
244 | } |
||
245 | ++p; |
||
246 | } |
||
247 | return "end of data in comment"; |
||
248 | |||
249 | parse_cdata: |
||
250 | if (p[1] != 'C' || p[2] != 'D' || p[3] != 'A' || p[4] != 'T' || p[5] != 'A' || p[6] != '[') |
||
251 | return "syntax error in CDATA section"; |
||
252 | p += 7; |
||
253 | mark = p; |
||
254 | while (*p) { |
||
255 | if (p[0] == ']' && p[1] == ']' && p[2] == '>') { |
||
256 | p += 3; |
||
257 | goto parse_text; |
||
258 | } |
||
259 | ++p; |
||
260 | } |
||
261 | return "end of data in CDATA section"; |
||
262 | |||
263 | parse_processing_instruction: |
||
264 | while (*p) { |
||
265 | if (p[0] == '?' && p[1] == '>') { |
||
266 | p += 2; |
||
267 | goto parse_text; |
||
268 | } |
||
269 | ++p; |
||
270 | } |
||
271 | return "end of data in processing instruction"; |
||
272 | |||
273 | parse_closing_element: |
||
274 | while (iswhite(*p)) ++p; |
||
275 | mark = p; |
||
276 | while (isname(*p)) ++p; |
||
277 | while (iswhite(*p)) ++p; |
||
278 | if (*p != '>') |
||
279 | return "syntax error in closing element"; |
||
280 | xml_emit_close_tag(x); |
||
281 | ++p; |
||
282 | goto parse_text; |
||
283 | |||
284 | parse_element_name: |
||
285 | mark = p; |
||
286 | while (isname(*p)) ++p; |
||
287 | xml_emit_open_tag(x, mark, p); |
||
288 | if (*p == '>') { ++p; goto parse_text; } |
||
289 | if (p[0] == '/' && p[1] == '>') { |
||
290 | xml_emit_close_tag(x); |
||
291 | p += 2; |
||
292 | goto parse_text; |
||
293 | } |
||
294 | if (iswhite(*p)) |
||
295 | goto parse_attributes; |
||
296 | return "syntax error after element name"; |
||
297 | |||
298 | parse_attributes: |
||
299 | while (iswhite(*p)) ++p; |
||
300 | if (isname(*p)) |
||
301 | goto parse_attribute_name; |
||
302 | if (*p == '>') { ++p; goto parse_text; } |
||
303 | if (p[0] == '/' && p[1] == '>') { |
||
304 | xml_emit_close_tag(x); |
||
305 | p += 2; |
||
306 | goto parse_text; |
||
307 | } |
||
308 | return "syntax error in attributes"; |
||
309 | |||
310 | parse_attribute_name: |
||
311 | mark = p; |
||
312 | while (isname(*p)) ++p; |
||
313 | xml_emit_att_name(x, mark, p); |
||
314 | while (iswhite(*p)) ++p; |
||
315 | if (*p == '=') { ++p; goto parse_attribute_value; } |
||
316 | return "syntax error after attribute name"; |
||
317 | |||
318 | parse_attribute_value: |
||
319 | while (iswhite(*p)) ++p; |
||
320 | quote = *p++; |
||
321 | if (quote != '"' && quote != '\'') |
||
322 | return "missing quote character"; |
||
323 | mark = p; |
||
324 | while (*p && *p != quote) ++p; |
||
325 | if (*p == quote) { |
||
326 | xml_emit_att_value(x, mark, p++); |
||
327 | goto parse_attributes; |
||
328 | } |
||
329 | return "end of data in attribute value"; |
||
330 | } |
||
331 | |||
332 | static char *convert_to_utf8(unsigned char *s, int n) |
||
333 | { |
||
334 | unsigned char *e = s + n; |
||
335 | char *dst, *d; |
||
336 | int c; |
||
337 | |||
338 | if (s[0] == 0xFE && s[1] == 0xFF) { |
||
339 | dst = d = fz_malloc(n * 2); |
||
340 | while (s + 1 < e) { |
||
341 | c = s[0] << 8 | s[1]; |
||
342 | d += runetochar(d, &c); |
||
343 | s += 2; |
||
344 | } |
||
345 | *d = 0; |
||
346 | return dst; |
||
347 | } |
||
348 | |||
349 | if (s[0] == 0xFF && s[1] == 0xFE) { |
||
350 | dst = d = fz_malloc(n * 2); |
||
351 | while (s + 1 < e) { |
||
352 | c = s[0] | s[1] << 8; |
||
353 | d += runetochar(d, &c); |
||
354 | s += 2; |
||
355 | } |
||
356 | *d = 0; |
||
357 | return dst; |
||
358 | } |
||
359 | |||
360 | return (char*)s; |
||
361 | } |
||
362 | |||
363 | struct element * |
||
364 | xml_parse_document(unsigned char *s, int n) |
||
365 | { |
||
366 | struct parser parser; |
||
367 | struct element root; |
||
368 | char *p, *error; |
||
369 | |||
370 | /* s is already null-terminated (see xps_new_part) */ |
||
371 | |||
372 | memset(&root, 0, sizeof(root)); |
||
373 | parser.head = &root; |
||
374 | |||
375 | p = convert_to_utf8(s, n); |
||
376 | |||
377 | error = xml_parse_document_imp(&parser, p); |
||
378 | if (error) { |
||
379 | fz_throw(error); |
||
380 | return NULL; |
||
381 | } |
||
382 | |||
383 | if (p != (char*)s) |
||
384 | fz_free(p); |
||
385 | |||
386 | return root.down; |
||
387 | }><>>><>>!->!>')>')>=>=>=>>'; |