Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
3584 | sourcerer | 1 | #define _GNU_SOURCE |
2 | |||
3 | #include |
||
4 | #include |
||
5 | #include |
||
6 | #include |
||
7 | #include |
||
8 | |||
9 | #include |
||
10 | #include |
||
11 | #include |
||
12 | #include |
||
13 | |||
14 | #include |
||
15 | #include |
||
16 | #include |
||
17 | |||
18 | #define UNUSED(x) ((x) = (x)) |
||
19 | |||
20 | typedef struct attr_t attr_t; |
||
21 | typedef struct node_t node_t; |
||
22 | typedef struct buf_t buf_t; |
||
23 | |||
24 | struct attr_t { |
||
25 | hubbub_ns ns; |
||
26 | char *name; |
||
27 | char *value; |
||
28 | }; |
||
29 | |||
30 | struct node_t { |
||
31 | enum { DOCTYPE, COMMENT, ELEMENT, CHARACTER } type; |
||
32 | |||
33 | union { |
||
34 | struct { |
||
35 | char *name; |
||
36 | char *public_id; |
||
37 | char *system_id; |
||
38 | } doctype; |
||
39 | |||
40 | struct { |
||
41 | hubbub_ns ns; |
||
42 | char *name; |
||
43 | attr_t *attrs; |
||
44 | size_t n_attrs; |
||
45 | } element; |
||
46 | |||
47 | char *content; /**< For comments, characters **/ |
||
48 | } data; |
||
49 | |||
50 | node_t *next; |
||
51 | node_t *prev; |
||
52 | |||
53 | node_t *child; |
||
54 | node_t *parent; |
||
55 | }; |
||
56 | |||
57 | struct buf_t { |
||
58 | char *buf; |
||
59 | size_t len; |
||
60 | size_t pos; |
||
61 | }; |
||
62 | |||
63 | |||
64 | #define NUM_NAMESPACES 7 |
||
65 | const char const *ns_names[NUM_NAMESPACES] = |
||
66 | { NULL, NULL /*html*/, "math", "svg", "xlink", "xml", "xmlns" }; |
||
67 | |||
68 | |||
69 | node_t *Document; |
||
70 | |||
71 | |||
72 | |||
73 | static int create_comment(void *ctx, const hubbub_string *data, void **result); |
||
74 | static int create_doctype(void *ctx, const hubbub_doctype *doctype, |
||
75 | void **result); |
||
76 | static int create_element(void *ctx, const hubbub_tag *tag, void **result); |
||
77 | static int create_text(void *ctx, const hubbub_string *data, void **result); |
||
78 | static int ref_node(void *ctx, void *node); |
||
79 | static int unref_node(void *ctx, void *node); |
||
80 | static int append_child(void *ctx, void *parent, void *child, void **result); |
||
81 | static int insert_before(void *ctx, void *parent, void *child, void *ref_child, |
||
82 | void **result); |
||
83 | static int remove_child(void *ctx, void *parent, void *child, void **result); |
||
84 | static int clone_node(void *ctx, void *node, bool deep, void **result); |
||
85 | static int reparent_children(void *ctx, void *node, void *new_parent); |
||
86 | static int get_parent(void *ctx, void *node, bool element_only, void **result); |
||
87 | static int has_children(void *ctx, void *node, bool *result); |
||
88 | static int form_associate(void *ctx, void *form, void *node); |
||
89 | static int add_attributes(void *ctx, void *node, |
||
90 | const hubbub_attribute *attributes, uint32_t n_attributes); |
||
91 | static int set_quirks_mode(void *ctx, hubbub_quirks_mode mode); |
||
92 | |||
93 | static hubbub_tree_handler tree_handler = { |
||
94 | create_comment, |
||
95 | create_doctype, |
||
96 | create_element, |
||
97 | create_text, |
||
98 | ref_node, |
||
99 | unref_node, |
||
100 | append_child, |
||
101 | insert_before, |
||
102 | remove_child, |
||
103 | clone_node, |
||
104 | reparent_children, |
||
105 | get_parent, |
||
106 | has_children, |
||
107 | form_associate, |
||
108 | add_attributes, |
||
109 | set_quirks_mode, |
||
110 | NULL, |
||
111 | NULL |
||
112 | }; |
||
113 | |||
114 | static void *myrealloc(void *ptr, size_t len, void *pw) |
||
115 | { |
||
116 | UNUSED(pw); |
||
117 | |||
118 | return realloc(ptr, len); |
||
119 | } |
||
120 | |||
121 | |||
122 | |||
123 | int main(int argc, char **argv) |
||
124 | { |
||
125 | hubbub_parser *parser; |
||
126 | hubbub_parser_optparams params; |
||
127 | |||
128 | struct stat info; |
||
129 | int fd; |
||
130 | uint8_t *file; |
||
131 | |||
132 | if (argc != 3) { |
||
133 | printf("Usage: %s |
||
134 | return 1; |
||
135 | } |
||
136 | |||
137 | /* Initialise library */ |
||
138 | assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK); |
||
139 | |||
140 | assert(hubbub_parser_create("UTF-8", false, myrealloc, NULL, &parser) == |
||
141 | HUBBUB_OK); |
||
142 | |||
143 | params.tree_handler = &tree_handler; |
||
144 | assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TREE_HANDLER, |
||
145 | ¶ms) == HUBBUB_OK); |
||
146 | |||
147 | params.document_node = (void *)1; |
||
148 | assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_DOCUMENT_NODE, |
||
149 | ¶ms) == HUBBUB_OK); |
||
150 | |||
151 | stat(argv[2], &info); |
||
152 | fd = open(argv[2], 0); |
||
153 | file = mmap(NULL, info.st_size, PROT_READ, MAP_SHARED, fd, 0); |
||
154 | |||
155 | assert(hubbub_parser_parse_chunk(parser, file, info.st_size) |
||
156 | == HUBBUB_OK); |
||
157 | |||
158 | assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK); |
||
159 | |||
160 | return 0; |
||
161 | } |
||
162 | |||
163 | |||
164 | /*** Tree construction functions ***/ |
||
165 | |||
166 | int create_comment(void *ctx, const hubbub_string *data, void **result) |
||
167 | { |
||
168 | node_t *node = calloc(1, sizeof *node); |
||
169 | |||
170 | UNUSED(ctx); |
||
171 | |||
172 | node->type = COMMENT; |
||
173 | node->data.content = strndup((const char *) data->ptr, data->len); |
||
174 | |||
175 | *result = node; |
||
176 | |||
177 | return 0; |
||
178 | } |
||
179 | |||
180 | int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result) |
||
181 | { |
||
182 | node_t *node = calloc(1, sizeof *node); |
||
183 | |||
184 | UNUSED(ctx); |
||
185 | |||
186 | node->type = DOCTYPE; |
||
187 | node->data.doctype.name = strndup( |
||
188 | (const char *) doctype->name.ptr, |
||
189 | doctype->name.len); |
||
190 | |||
191 | if (!doctype->public_missing) { |
||
192 | node->data.doctype.public_id = strndup( |
||
193 | (const char *) doctype->public_id.ptr, |
||
194 | doctype->public_id.len); |
||
195 | } |
||
196 | |||
197 | if (!doctype->system_missing) { |
||
198 | node->data.doctype.system_id = strndup( |
||
199 | (const char *) doctype->system_id.ptr, |
||
200 | doctype->system_id.len); |
||
201 | } |
||
202 | |||
203 | *result = node; |
||
204 | |||
205 | return 0; |
||
206 | } |
||
207 | |||
208 | int create_element(void *ctx, const hubbub_tag *tag, void **result) |
||
209 | { |
||
210 | node_t *node = calloc(1, sizeof *node); |
||
211 | |||
212 | UNUSED(ctx); |
||
213 | |||
214 | assert(tag->ns < NUM_NAMESPACES); |
||
215 | |||
216 | node->type = ELEMENT; |
||
217 | node->data.element.ns = tag->ns; |
||
218 | node->data.element.name = strndup( |
||
219 | (const char *) tag->name.ptr, |
||
220 | tag->name.len); |
||
221 | node->data.element.n_attrs = tag->n_attributes; |
||
222 | |||
223 | node->data.element.attrs = calloc(node->data.element.n_attrs, |
||
224 | sizeof *node->data.element.attrs); |
||
225 | |||
226 | for (size_t i = 0; i < tag->n_attributes; i++) { |
||
227 | attr_t *attr = &node->data.element.attrs[i]; |
||
228 | |||
229 | assert(tag->attributes[i].ns < NUM_NAMESPACES); |
||
230 | |||
231 | attr->ns = tag->attributes[i].ns; |
||
232 | |||
233 | attr->name = strndup( |
||
234 | (const char *) tag->attributes[i].name.ptr, |
||
235 | tag->attributes[i].name.len); |
||
236 | |||
237 | attr->value = strndup( |
||
238 | (const char *) tag->attributes[i].value.ptr, |
||
239 | tag->attributes[i].value.len); |
||
240 | } |
||
241 | |||
242 | *result = node; |
||
243 | |||
244 | return 0; |
||
245 | } |
||
246 | |||
247 | int create_text(void *ctx, const hubbub_string *data, void **result) |
||
248 | { |
||
249 | node_t *node = calloc(1, sizeof *node); |
||
250 | |||
251 | UNUSED(ctx); |
||
252 | |||
253 | node->type = CHARACTER; |
||
254 | node->data.content = strndup((const char *) data->ptr, data->len); |
||
255 | |||
256 | *result = node; |
||
257 | |||
258 | return 0; |
||
259 | } |
||
260 | |||
261 | int ref_node(void *ctx, void *node) |
||
262 | { |
||
263 | UNUSED(ctx); |
||
264 | UNUSED(node); |
||
265 | |||
266 | return 0; |
||
267 | } |
||
268 | |||
269 | int unref_node(void *ctx, void *node) |
||
270 | { |
||
271 | UNUSED(ctx); |
||
272 | UNUSED(node); |
||
273 | |||
274 | return 0; |
||
275 | } |
||
276 | |||
277 | int append_child(void *ctx, void *parent, void *child, void **result) |
||
278 | { |
||
279 | node_t *tparent = parent; |
||
280 | node_t *tchild = child; |
||
281 | |||
282 | UNUSED(ctx); |
||
283 | |||
284 | node_t *insert = NULL; |
||
285 | |||
286 | tchild->parent = tparent; |
||
287 | tchild->next = tchild->prev = NULL; |
||
288 | |||
289 | *result = child; |
||
290 | |||
291 | if (parent == (void *)1) { |
||
292 | if (Document) { |
||
293 | insert = Document; |
||
294 | } else { |
||
295 | Document = tchild; |
||
296 | } |
||
297 | } else { |
||
298 | if (tparent->child == NULL) { |
||
299 | tparent->child = tchild; |
||
300 | } else { |
||
301 | insert = tparent->child; |
||
302 | } |
||
303 | } |
||
304 | |||
305 | if (insert) { |
||
306 | while (insert->next != NULL) { |
||
307 | insert = insert->next; |
||
308 | } |
||
309 | |||
310 | if (tchild->type == CHARACTER && insert->type == CHARACTER) { |
||
311 | insert->data.content = realloc(insert->data.content, |
||
312 | strlen(insert->data.content) + |
||
313 | strlen(tchild->data.content) + 1); |
||
314 | strcat(insert->data.content, tchild->data.content); |
||
315 | *result = insert; |
||
316 | } else { |
||
317 | insert->next = tchild; |
||
318 | tchild->prev = insert; |
||
319 | } |
||
320 | } |
||
321 | |||
322 | return 0; |
||
323 | } |
||
324 | |||
325 | /* insert 'child' before 'ref_child', under 'parent' */ |
||
326 | int insert_before(void *ctx, void *parent, void *child, void *ref_child, |
||
327 | void **result) |
||
328 | { |
||
329 | node_t *tparent = parent; |
||
330 | node_t *tchild = child; |
||
331 | node_t *tref = ref_child; |
||
332 | |||
333 | UNUSED(ctx); |
||
334 | |||
335 | if (tchild->type == CHARACTER && tref->prev && |
||
336 | tref->prev->type == CHARACTER) { |
||
337 | node_t *insert = tref->prev; |
||
338 | |||
339 | insert->data.content = realloc(insert->data.content, |
||
340 | strlen(insert->data.content) + |
||
341 | strlen(tchild->data.content) + 1); |
||
342 | strcat(insert->data.content, tchild->data.content); |
||
343 | |||
344 | *result = insert; |
||
345 | } else { |
||
346 | tchild->parent = parent; |
||
347 | |||
348 | tchild->prev = tref->prev; |
||
349 | tchild->next = tref; |
||
350 | tref->prev = tchild; |
||
351 | |||
352 | if (tchild->prev) |
||
353 | tchild->prev->next = tchild; |
||
354 | else |
||
355 | tparent->child = tchild; |
||
356 | |||
357 | *result = child; |
||
358 | } |
||
359 | |||
360 | return 0; |
||
361 | } |
||
362 | |||
363 | int remove_child(void *ctx, void *parent, void *child, void **result) |
||
364 | { |
||
365 | node_t *tparent = parent; |
||
366 | node_t *tchild = child; |
||
367 | |||
368 | UNUSED(ctx); |
||
369 | |||
370 | assert(tparent->child); |
||
371 | assert(tchild->parent == tparent); |
||
372 | |||
373 | if (tchild->parent->child == tchild) { |
||
374 | tchild->parent->child = tchild->next; |
||
375 | } |
||
376 | |||
377 | if (tchild->prev) |
||
378 | tchild->prev->next = tchild->next; |
||
379 | |||
380 | if (tchild->next) |
||
381 | tchild->next->prev = tchild->prev; |
||
382 | |||
383 | /* now reset all the child's pointers */ |
||
384 | tchild->next = tchild->prev = tchild->parent = NULL; |
||
385 | |||
386 | *result = child; |
||
387 | |||
388 | return 0; |
||
389 | } |
||
390 | |||
391 | int clone_node(void *ctx, void *node, bool deep, void **result) |
||
392 | { |
||
393 | node_t *old_node = node; |
||
394 | node_t *new_node = calloc(1, sizeof *new_node); |
||
395 | |||
396 | UNUSED(ctx); |
||
397 | |||
398 | *new_node = *old_node; |
||
399 | *result = new_node; |
||
400 | |||
401 | new_node->child = new_node->parent = |
||
402 | new_node->next = new_node->prev = |
||
403 | NULL; |
||
404 | |||
405 | if (deep == false) |
||
406 | return 0; |
||
407 | |||
408 | if (old_node->next) { |
||
409 | void *n; |
||
410 | |||
411 | clone_node(ctx, old_node->next, true, &n); |
||
412 | |||
413 | new_node->next = n; |
||
414 | new_node->next->prev = new_node; |
||
415 | } |
||
416 | |||
417 | if (old_node->child) { |
||
418 | void *n; |
||
419 | |||
420 | clone_node(ctx, old_node->child, true, &n); |
||
421 | |||
422 | new_node->child = n; |
||
423 | new_node->child->parent = new_node; |
||
424 | } |
||
425 | |||
426 | return 0; |
||
427 | } |
||
428 | |||
429 | /* Take all of the child nodes of "node" and append them to "new_parent" */ |
||
430 | int reparent_children(void *ctx, void *node, void *new_parent) |
||
431 | { |
||
432 | node_t *parent = new_parent; |
||
433 | node_t *old_parent = node; |
||
434 | |||
435 | node_t *insert; |
||
436 | node_t *kids; |
||
437 | |||
438 | UNUSED(ctx); |
||
439 | |||
440 | kids = old_parent->child; |
||
441 | if (!kids) return 0; |
||
442 | |||
443 | old_parent->child = NULL; |
||
444 | |||
445 | insert = parent->child; |
||
446 | if (!insert) { |
||
447 | parent->child = kids; |
||
448 | } else { |
||
449 | while (insert->next != NULL) { |
||
450 | insert = insert->next; |
||
451 | } |
||
452 | |||
453 | insert->next = kids; |
||
454 | kids->prev = insert; |
||
455 | } |
||
456 | |||
457 | while (kids) { |
||
458 | kids->parent = parent; |
||
459 | kids = kids->next; |
||
460 | } |
||
461 | |||
462 | return 0; |
||
463 | } |
||
464 | |||
465 | int get_parent(void *ctx, void *node, bool element_only, void **result) |
||
466 | { |
||
467 | UNUSED(ctx); |
||
468 | UNUSED(element_only); |
||
469 | |||
470 | *result = ((node_t *)node)->parent; |
||
471 | |||
472 | return 0; |
||
473 | } |
||
474 | |||
475 | int has_children(void *ctx, void *node, bool *result) |
||
476 | { |
||
477 | UNUSED(ctx); |
||
478 | |||
479 | *result = ((node_t *)node)->child ? true : false; |
||
480 | |||
481 | return 0; |
||
482 | } |
||
483 | |||
484 | int form_associate(void *ctx, void *form, void *node) |
||
485 | { |
||
486 | UNUSED(ctx); |
||
487 | UNUSED(form); |
||
488 | UNUSED(node); |
||
489 | |||
490 | return 0; |
||
491 | } |
||
492 | |||
493 | int add_attributes(void *ctx, void *vnode, |
||
494 | const hubbub_attribute *attributes, uint32_t n_attributes) |
||
495 | { |
||
496 | node_t *node = vnode; |
||
497 | size_t old_elems = node->data.element.n_attrs; |
||
498 | |||
499 | UNUSED(ctx); |
||
500 | |||
501 | node->data.element.n_attrs += n_attributes; |
||
502 | |||
503 | node->data.element.attrs = realloc(node->data.element.attrs, |
||
504 | node->data.element.n_attrs * |
||
505 | sizeof *node->data.element.attrs); |
||
506 | |||
507 | for (size_t i = 0; i < n_attributes; i++) { |
||
508 | attr_t *attr = &node->data.element.attrs[old_elems + i]; |
||
509 | |||
510 | assert(attributes[i].ns < NUM_NAMESPACES); |
||
511 | |||
512 | attr->ns = attributes[i].ns; |
||
513 | |||
514 | attr->name = strndup( |
||
515 | (const char *) attributes[i].name.ptr, |
||
516 | attributes[i].name.len); |
||
517 | |||
518 | attr->value = strndup( |
||
519 | (const char *) attributes[i].value.ptr, |
||
520 | attributes[i].value.len); |
||
521 | } |
||
522 | |||
523 | |||
524 | return 0; |
||
525 | } |
||
526 | |||
527 | int set_quirks_mode(void *ctx, hubbub_quirks_mode mode) |
||
528 | { |
||
529 | UNUSED(ctx); |
||
530 | UNUSED(mode); |
||
531 | |||
532 | return 0; |
||
533 | }>>>>>> |