Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4680 right-hear 1
#include "fitz.h"
2
#include "muxps.h"
3
 
4
struct attribute
5
{
6
	char name[40];
7
	char *value;
8
	struct attribute *next;
9
};
10
 
11
struct element
12
{
13
	char name[40];
14
	struct attribute *atts;
15
	struct element *up, *down, *next;
16
};
17
 
18
struct parser
19
{
20
	struct element *head;
21
};
22
 
23
static inline void indent(int n)
24
{
25
	while (n--) putchar(' ');
26
}
27
 
28
void xml_print_element(struct element *item, int level)
29
{
30
	while (item) {
31
		struct attribute *att;
32
		indent(level);
33
		printf("<%s", item->name);
34
		for (att = item->atts; att; att = att->next)
35
			printf(" %s=\"%s\"", att->name, att->value);
36
		if (item->down) {
37
			printf(">\n");
38
			xml_print_element(item->down, level + 1);
39
			indent(level);
40
			printf("\n", item->name);
41
		}
42
		else {
43
			printf("/>\n");
44
		}
45
		item = item->next;
46
	}
47
}
48
 
49
struct element *xml_next(struct element *item)
50
{
51
	return item->next;
52
}
53
 
54
struct element *xml_down(struct element *item)
55
{
56
	return item->down;
57
}
58
 
59
char *xml_tag(struct element *item)
60
{
61
	return item->name;
62
}
63
 
64
char *xml_att(struct element *item, const char *name)
65
{
66
	struct attribute *att;
67
	for (att = item->atts; att; att = att->next)
68
		if (!strcmp(att->name, name))
69
			return att->value;
70
	return NULL;
71
}
72
 
73
static void xml_free_attribute(struct attribute *att)
74
{
75
	while (att) {
76
		struct attribute *next = att->next;
77
		if (att->value)
78
			fz_free(att->value);
79
		fz_free(att);
80
		att = next;
81
	}
82
}
83
 
84
void xml_free_element(struct element *item)
85
{
86
	while (item) {
87
		struct element *next = item->next;
88
		if (item->atts)
89
			xml_free_attribute(item->atts);
90
		if (item->down)
91
			xml_free_element(item->down);
92
		fz_free(item);
93
		item = next;
94
	}
95
}
96
 
97
static int xml_parse_entity(int *c, char *a)
98
{
99
	char *b;
100
	if (a[1] == '#') {
101
		if (a[2] == 'x')
102
			*c = strtol(a + 3, &b, 16);
103
		else
104
			*c = strtol(a + 2, &b, 10);
105
		if (*b == ';')
106
			return b - a + 1;
107
	}
108
	else if (a[1] == 'l' && a[2] == 't' && a[3] == ';') {
109
		*c = '<';
110
		return 4;
111
	}
112
	else if (a[1] == 'g' && a[2] == 't' && a[3] == ';') {
113
		*c = '>';
114
		return 4;
115
	}
116
	else if (a[1] == 'a' && a[2] == 'm' && a[3] == 'p' && a[4] == ';') {
117
		*c = '&';
118
		return 5;
119
	}
120
	else if (a[1] == 'a' && a[2] == 'p' && a[3] == 'o' && a[4] == 's' && a[5] == ';') {
121
		*c = '\'';
122
		return 6;
123
	}
124
	else if (a[1] == 'q' && a[2] == 'u' && a[3] == 'o' && a[4] == 't' && a[5] == ';') {
125
		*c = '"';
126
		return 6;
127
	}
128
	*c = *a++;
129
	return 1;
130
}
131
 
132
static void xml_emit_open_tag(struct parser *parser, char *a, char *b)
133
{
134
	struct element *head, *tail;
135
 
136
	head = fz_malloc(sizeof(struct element));
137
	if (b - a > sizeof(head->name))
138
		b = a + sizeof(head->name);
139
	memcpy(head->name, a, b - a);
140
	head->name[b - a] = 0;
141
 
142
	head->atts = NULL;
143
	head->up = parser->head;
144
	head->down = NULL;
145
	head->next = NULL;
146
 
147
	if (!parser->head->down) {
148
		parser->head->down = head;
149
	}
150
	else {
151
		tail = parser->head->down;
152
		while (tail->next)
153
			tail = tail->next;
154
		tail->next = head;
155
	}
156
 
157
	parser->head = head;
158
}
159
 
160
static void xml_emit_att_name(struct parser *parser, char *a, char *b)
161
{
162
	struct element *head = parser->head;
163
	struct attribute *att;
164
 
165
	att = fz_malloc(sizeof(struct attribute));
166
	if (b - a > sizeof(att->name))
167
		b = a + sizeof(att->name);
168
	memcpy(att->name, a, b - a);
169
	att->name[b - a] = 0;
170
	att->value = NULL;
171
	att->next = head->atts;
172
	head->atts = att;
173
}
174
 
175
static void xml_emit_att_value(struct parser *parser, char *a, char *b)
176
{
177
	struct element *head = parser->head;
178
	struct attribute *att = head->atts;
179
	char *s;
180
	int c;
181
 
182
	/* entities are all longer than UTFmax so runetochar is safe */
183
	s = att->value = fz_malloc(b - a + 1);
184
	while (a < b) {
185
		if (*a == '&') {
186
			a += xml_parse_entity(&c, a);
187
			s += runetochar(s, &c);
188
		}
189
		else {
190
			*s++ = *a++;
191
		}
192
	}
193
	*s = 0;
194
}
195
 
196
static void xml_emit_close_tag(struct parser *parser)
197
{
198
	if (parser->head->up)
199
		parser->head = parser->head->up;
200
}
201
 
202
static inline int isname(int c)
203
{
204
	return c == '.' || c == '-' || c == '_' || c == ':' ||
205
		(c >= '0' && c <= '9') ||
206
		(c >= 'A' && c <= 'Z') ||
207
		(c >= 'a' && c <= 'z');
208
}
209
 
210
static inline int iswhite(int c)
211
{
212
	return c == ' ' || c == '\r' || c == '\n' || c == '\t';
213
}
214
 
215
static char *xml_parse_document_imp(struct parser *x, char *p)
216
{
217
	char *mark;
218
	int quote;
219
 
220
parse_text:
221
	mark = p;
222
	while (*p && *p != '<') ++p;
223
	if (*p == '<') { ++p; goto parse_element; }
224
	return NULL;
225
 
226
parse_element:
227
	if (*p == '/') { ++p; goto parse_closing_element; }
228
	if (*p == '!') { ++p; goto parse_comment; }
229
	if (*p == '?') { ++p; goto parse_processing_instruction; }
230
	while (iswhite(*p)) ++p;
231
	if (isname(*p))
232
		goto parse_element_name;
233
	return "syntax error in element";
234
 
235
parse_comment:
236
	if (*p == '[') goto parse_cdata;
237
	if (*p++ != '-') return "syntax error in comment (
238
	if (*p++ != '-') return "syntax error in comment (
239
	mark = p;
240
	while (*p) {
241
		if (p[0] == '-' && p[1] == '-' && p[2] == '>') {
242
			p += 3;
243
			goto parse_text;
244
		}
245
		++p;
246
	}
247
	return "end of data in comment";
248
 
249
parse_cdata:
250
	if (p[1] != 'C' || p[2] != 'D' || p[3] != 'A' || p[4] != 'T' || p[5] != 'A' || p[6] != '[')
251
		return "syntax error in CDATA section";
252
	p += 7;
253
	mark = p;
254
	while (*p) {
255
		if (p[0] == ']' && p[1] == ']' && p[2] == '>') {
256
			p += 3;
257
			goto parse_text;
258
		}
259
		++p;
260
	}
261
	return "end of data in CDATA section";
262
 
263
parse_processing_instruction:
264
	while (*p) {
265
		if (p[0] == '?' && p[1] == '>') {
266
			p += 2;
267
			goto parse_text;
268
		}
269
		++p;
270
	}
271
	return "end of data in processing instruction";
272
 
273
parse_closing_element:
274
	while (iswhite(*p)) ++p;
275
	mark = p;
276
	while (isname(*p)) ++p;
277
	while (iswhite(*p)) ++p;
278
	if (*p != '>')
279
		return "syntax error in closing element";
280
	xml_emit_close_tag(x);
281
	++p;
282
	goto parse_text;
283
 
284
parse_element_name:
285
	mark = p;
286
	while (isname(*p)) ++p;
287
	xml_emit_open_tag(x, mark, p);
288
	if (*p == '>') { ++p; goto parse_text; }
289
	if (p[0] == '/' && p[1] == '>') {
290
		xml_emit_close_tag(x);
291
		p += 2;
292
		goto parse_text;
293
	}
294
	if (iswhite(*p))
295
		goto parse_attributes;
296
	return "syntax error after element name";
297
 
298
parse_attributes:
299
	while (iswhite(*p)) ++p;
300
	if (isname(*p))
301
		goto parse_attribute_name;
302
	if (*p == '>') { ++p; goto parse_text; }
303
	if (p[0] == '/' && p[1] == '>') {
304
		xml_emit_close_tag(x);
305
		p += 2;
306
		goto parse_text;
307
	}
308
	return "syntax error in attributes";
309
 
310
parse_attribute_name:
311
	mark = p;
312
	while (isname(*p)) ++p;
313
	xml_emit_att_name(x, mark, p);
314
	while (iswhite(*p)) ++p;
315
	if (*p == '=') { ++p; goto parse_attribute_value; }
316
	return "syntax error after attribute name";
317
 
318
parse_attribute_value:
319
	while (iswhite(*p)) ++p;
320
	quote = *p++;
321
	if (quote != '"' && quote != '\'')
322
		return "missing quote character";
323
	mark = p;
324
	while (*p && *p != quote) ++p;
325
	if (*p == quote) {
326
		xml_emit_att_value(x, mark, p++);
327
		goto parse_attributes;
328
	}
329
	return "end of data in attribute value";
330
}
331
 
332
static char *convert_to_utf8(unsigned char *s, int n)
333
{
334
	unsigned char *e = s + n;
335
	char *dst, *d;
336
	int c;
337
 
338
	if (s[0] == 0xFE && s[1] == 0xFF) {
339
		dst = d = fz_malloc(n * 2);
340
		while (s + 1 < e) {
341
			c = s[0] << 8 | s[1];
342
			d += runetochar(d, &c);
343
			s += 2;
344
		}
345
		*d = 0;
346
		return dst;
347
	}
348
 
349
	if (s[0] == 0xFF && s[1] == 0xFE) {
350
		dst = d = fz_malloc(n * 2);
351
		while (s + 1 < e) {
352
			c = s[0] | s[1] << 8;
353
			d += runetochar(d, &c);
354
			s += 2;
355
		}
356
		*d = 0;
357
		return dst;
358
	}
359
 
360
	return (char*)s;
361
}
362
 
363
struct element *
364
xml_parse_document(unsigned char *s, int n)
365
{
366
	struct parser parser;
367
	struct element root;
368
	char *p, *error;
369
 
370
	/* s is already null-terminated (see xps_new_part) */
371
 
372
	memset(&root, 0, sizeof(root));
373
	parser.head = &root;
374
 
375
	p = convert_to_utf8(s, n);
376
 
377
	error = xml_parse_document_imp(&parser, p);
378
	if (error) {
379
		fz_throw(error);
380
		return NULL;
381
	}
382
 
383
	if (p != (char*)s)
384
		fz_free(p);
385
 
386
	return root.down;
387
}