Rev 4364 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
3584 | sourcerer | 1 | /* |
2 | * This file is part of libdom. |
||
3 | * Licensed under the MIT License, |
||
4 | * http://www.opensource.org/licenses/mit-license.php |
||
5 | * Copyright 2007 John-Mark Bell |
||
6 | * Copyright 2009 Bo Yang |
||
7 | * Copyright 2012 Daniel Silverstone |
||
8 | */ |
||
9 | |||
10 | #include |
||
11 | #include |
||
12 | |||
4821 | ashmew2 | 13 | #include "hubbub/errors.h" |
14 | #include "hubbub/hubbub.h" |
||
15 | #include "hubbub/parser.h" |
||
3584 | sourcerer | 16 | |
17 | #include |
||
18 | |||
4224 | sourcerer | 19 | |
20 | //#include "errors.h" |
||
3584 | sourcerer | 21 | #include "parser.h" |
22 | #include "utils.h" |
||
23 | |||
24 | #include "core/document.h" |
||
4224 | sourcerer | 25 | |
26 | |||
3584 | sourcerer | 27 | #include "core/string.h" |
28 | #include "core/node.h" |
||
29 | |||
30 | #include "html/html_document.h" |
||
31 | #include "html/html_button_element.h" |
||
32 | #include "html/html_input_element.h" |
||
33 | #include "html/html_select_element.h" |
||
34 | #include "html/html_text_area_element.h" |
||
35 | |||
36 | #include |
||
37 | |||
38 | /** |
||
39 | * libdom Hubbub parser context |
||
40 | */ |
||
41 | struct dom_hubbub_parser { |
||
42 | hubbub_parser *parser; /**< Hubbub parser instance */ |
||
43 | hubbub_tree_handler tree_handler; |
||
44 | /**< Hubbub parser tree handler */ |
||
45 | |||
46 | struct dom_document *doc; /**< DOM Document we're building */ |
||
47 | |||
48 | dom_hubbub_encoding_source encoding_source; |
||
49 | /**< The document's encoding source */ |
||
50 | const char *encoding; /**< The document's encoding */ |
||
51 | |||
52 | bool complete; /**< Indicate stream completion */ |
||
53 | |||
54 | dom_msg msg; /**< Informational messaging function */ |
||
55 | |||
56 | dom_script script; /**< Script callback function */ |
||
57 | |||
58 | void *mctx; /**< Pointer to client data */ |
||
59 | }; |
||
60 | |||
61 | /* Forward declaration to break reference loop */ |
||
62 | static hubbub_error add_attributes(void *parser, void *node, const hubbub_attribute *attributes, uint32_t n_attributes); |
||
63 | |||
64 | |||
65 | |||
66 | |||
67 | |||
68 | /*--------------------- The callbacks definitions --------------------*/ |
||
69 | static hubbub_error create_comment(void *parser, const hubbub_string *data, |
||
70 | void **result) |
||
71 | { |
||
72 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
||
73 | dom_exception err; |
||
74 | dom_string *str; |
||
75 | struct dom_comment *comment; |
||
76 | |||
77 | *result = NULL; |
||
78 | |||
79 | err = dom_string_create(data->ptr, data->len, &str); |
||
80 | if (err != DOM_NO_ERR) { |
||
81 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
82 | "Can't create comment node text"); |
||
83 | return HUBBUB_UNKNOWN; |
||
84 | } |
||
85 | |||
86 | err = dom_document_create_comment(dom_parser->doc, str, &comment); |
||
87 | if (err != DOM_NO_ERR) { |
||
88 | dom_string_unref(str); |
||
89 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
90 | "Can't create comment node with text '%.*s'", |
||
91 | data->len, data->ptr); |
||
92 | return HUBBUB_UNKNOWN; |
||
93 | } |
||
94 | |||
95 | *result = comment; |
||
96 | |||
97 | dom_string_unref(str); |
||
98 | |||
99 | return HUBBUB_OK; |
||
100 | } |
||
101 | |||
102 | static char *parser_strndup(const char *s, size_t n) |
||
103 | { |
||
104 | size_t len; |
||
105 | char *s2; |
||
106 | |||
107 | for (len = 0; len != n && s[len] != '\0'; len++) |
||
108 | continue; |
||
109 | |||
110 | s2 = malloc(len + 1); |
||
111 | if (s2 == NULL) |
||
112 | return NULL; |
||
113 | |||
114 | memcpy(s2, s, len); |
||
115 | s2[len] = '\0'; |
||
116 | return s2; |
||
117 | } |
||
118 | |||
119 | static hubbub_error create_doctype(void *parser, const hubbub_doctype *doctype, |
||
120 | void **result) |
||
121 | { |
||
122 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
||
123 | dom_exception err; |
||
124 | char *qname, *public_id = NULL, *system_id = NULL; |
||
125 | struct dom_document_type *dtype; |
||
126 | |||
127 | *result = NULL; |
||
128 | |||
129 | qname = parser_strndup((const char *) doctype->name.ptr, |
||
130 | (size_t) doctype->name.len); |
||
131 | if (qname == NULL) { |
||
132 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
133 | "Can't create doctype name"); |
||
134 | goto fail; |
||
135 | } |
||
136 | |||
137 | if (doctype->public_missing == false) { |
||
138 | public_id = parser_strndup( |
||
139 | (const char *) doctype->public_id.ptr, |
||
140 | (size_t) doctype->public_id.len); |
||
141 | } else { |
||
142 | public_id = strdup(""); |
||
143 | } |
||
144 | if (public_id == NULL) { |
||
145 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
146 | "Can't create doctype public id"); |
||
147 | goto clean1; |
||
148 | } |
||
149 | |||
150 | if (doctype->system_missing == false) { |
||
151 | system_id = parser_strndup( |
||
152 | (const char *) doctype->system_id.ptr, |
||
153 | (size_t) doctype->system_id.len); |
||
154 | } else { |
||
155 | system_id = strdup(""); |
||
156 | } |
||
157 | if (system_id == NULL) { |
||
158 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
159 | "Can't create doctype system id"); |
||
160 | goto clean2; |
||
161 | } |
||
162 | |||
163 | err = dom_implementation_create_document_type(qname, |
||
164 | public_id, system_id, &dtype); |
||
165 | if (err != DOM_NO_ERR) { |
||
166 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
167 | "Can't create the document type"); |
||
168 | goto clean3; |
||
169 | } |
||
170 | |||
171 | *result = dtype; |
||
172 | |||
173 | clean3: |
||
174 | free(system_id); |
||
175 | |||
176 | clean2: |
||
177 | free(public_id); |
||
178 | |||
179 | clean1: |
||
180 | free(qname); |
||
181 | |||
182 | fail: |
||
183 | if (*result == NULL) |
||
184 | return HUBBUB_UNKNOWN; |
||
185 | else |
||
186 | return HUBBUB_OK; |
||
187 | } |
||
188 | |||
189 | static hubbub_error create_element(void *parser, const hubbub_tag *tag, |
||
190 | void **result) |
||
191 | { |
||
192 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
||
193 | dom_exception err; |
||
194 | dom_string *name; |
||
195 | struct dom_element *element = NULL; |
||
196 | hubbub_error herr; |
||
197 | |||
198 | *result = NULL; |
||
199 | |||
200 | err = dom_string_create_interned(tag->name.ptr, tag->name.len, &name); |
||
201 | if (err != DOM_NO_ERR) { |
||
202 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
203 | "Can't create element name"); |
||
204 | goto fail; |
||
205 | } |
||
206 | |||
207 | if (tag->ns == HUBBUB_NS_NULL) { |
||
208 | err = dom_document_create_element(dom_parser->doc, name, |
||
209 | &element); |
||
210 | if (err != DOM_NO_ERR) { |
||
211 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
212 | "Can't create the DOM element"); |
||
213 | goto clean1; |
||
214 | } |
||
215 | } else { |
||
216 | err = dom_document_create_element_ns(dom_parser->doc, |
||
217 | dom_namespaces[tag->ns], name, &element); |
||
218 | if (err != DOM_NO_ERR) { |
||
219 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
220 | "Can't create the DOM element"); |
||
221 | goto clean1; |
||
222 | } |
||
223 | } |
||
224 | |||
225 | if (element != NULL && tag->n_attributes > 0) { |
||
226 | herr = add_attributes(parser, element, tag->attributes, |
||
227 | tag->n_attributes); |
||
228 | if (herr != HUBBUB_OK) |
||
229 | goto clean1; |
||
230 | } |
||
231 | |||
232 | *result = element; |
||
233 | |||
234 | clean1: |
||
235 | dom_string_unref(name); |
||
236 | |||
237 | fail: |
||
238 | if (*result == NULL) |
||
239 | return HUBBUB_UNKNOWN; |
||
240 | else |
||
241 | return HUBBUB_OK; |
||
242 | } |
||
243 | |||
244 | static hubbub_error create_text(void *parser, const hubbub_string *data, |
||
245 | void **result) |
||
246 | { |
||
247 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
||
248 | dom_exception err; |
||
249 | dom_string *str; |
||
250 | struct dom_text *text = NULL; |
||
251 | |||
252 | *result = NULL; |
||
253 | |||
254 | err = dom_string_create(data->ptr, data->len, &str); |
||
255 | if (err != DOM_NO_ERR) { |
||
256 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
257 | "Can't create text '%.*s'", data->len, |
||
258 | data->ptr); |
||
259 | goto fail; |
||
260 | } |
||
261 | |||
262 | err = dom_document_create_text_node(dom_parser->doc, str, &text); |
||
263 | if (err != DOM_NO_ERR) { |
||
264 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
265 | "Can't create the DOM text node"); |
||
266 | goto clean1; |
||
267 | } |
||
268 | |||
269 | *result = text; |
||
270 | clean1: |
||
271 | dom_string_unref(str); |
||
272 | |||
273 | fail: |
||
274 | if (*result == NULL) |
||
275 | return HUBBUB_UNKNOWN; |
||
276 | else |
||
277 | return HUBBUB_OK; |
||
278 | |||
279 | } |
||
280 | |||
281 | static hubbub_error ref_node(void *parser, void *node) |
||
282 | { |
||
283 | struct dom_node *dnode = (struct dom_node *) node; |
||
284 | |||
285 | UNUSED(parser); |
||
286 | |||
287 | dom_node_ref(dnode); |
||
288 | |||
289 | return HUBBUB_OK; |
||
290 | } |
||
291 | |||
292 | static hubbub_error unref_node(void *parser, void *node) |
||
293 | { |
||
294 | struct dom_node *dnode = (struct dom_node *) node; |
||
295 | |||
296 | UNUSED(parser); |
||
297 | |||
298 | dom_node_unref(dnode); |
||
299 | |||
300 | return HUBBUB_OK; |
||
301 | } |
||
302 | |||
303 | static hubbub_error append_child(void *parser, void *parent, void *child, |
||
304 | void **result) |
||
305 | { |
||
306 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
||
307 | dom_exception err; |
||
308 | |||
309 | err = dom_node_append_child((struct dom_node *) parent, |
||
310 | (struct dom_node *) child, |
||
311 | (struct dom_node **) result); |
||
312 | if (err != DOM_NO_ERR) { |
||
313 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
314 | "Can't append child '%p' for parent '%p'", |
||
315 | child, parent); |
||
316 | return HUBBUB_UNKNOWN; |
||
317 | } |
||
318 | |||
319 | return HUBBUB_OK; |
||
320 | } |
||
321 | |||
322 | static hubbub_error insert_before(void *parser, void *parent, void *child, |
||
323 | void *ref_child, void **result) |
||
324 | { |
||
325 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
||
326 | dom_exception err; |
||
327 | |||
328 | err = dom_node_insert_before((struct dom_node *) parent, |
||
329 | (struct dom_node *) child, |
||
330 | (struct dom_node *) ref_child, |
||
331 | (struct dom_node **) result); |
||
332 | if (err != DOM_NO_ERR) { |
||
333 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
334 | "Can't insert node '%p' before node '%p'", |
||
335 | child, ref_child); |
||
336 | return HUBBUB_UNKNOWN; |
||
337 | } |
||
338 | |||
339 | return HUBBUB_OK; |
||
340 | } |
||
341 | |||
342 | static hubbub_error remove_child(void *parser, void *parent, void *child, |
||
343 | void **result) |
||
344 | { |
||
345 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
||
346 | dom_exception err; |
||
347 | |||
348 | err = dom_node_remove_child((struct dom_node *) parent, |
||
349 | (struct dom_node *) child, |
||
350 | (struct dom_node **) result); |
||
351 | if (err != DOM_NO_ERR) { |
||
352 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
353 | "Can't remove child '%p'", child); |
||
354 | return HUBBUB_UNKNOWN; |
||
355 | } |
||
356 | |||
357 | return HUBBUB_OK; |
||
358 | } |
||
359 | |||
360 | static hubbub_error clone_node(void *parser, void *node, bool deep, |
||
361 | void **result) |
||
362 | { |
||
363 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
||
364 | dom_exception err; |
||
365 | |||
366 | err = dom_node_clone_node((struct dom_node *) node, deep, |
||
367 | (struct dom_node **) result); |
||
368 | if (err != DOM_NO_ERR) { |
||
369 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
370 | "Can't clone node '%p'", node); |
||
371 | return HUBBUB_UNKNOWN; |
||
372 | } |
||
373 | |||
374 | return HUBBUB_OK; |
||
375 | } |
||
376 | |||
377 | static hubbub_error reparent_children(void *parser, void *node, |
||
378 | void *new_parent) |
||
379 | { |
||
380 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
||
381 | dom_exception err; |
||
382 | struct dom_node *child, *result; |
||
383 | |||
384 | while(true) { |
||
385 | err = dom_node_get_first_child((struct dom_node *) node, |
||
386 | &child); |
||
387 | if (err != DOM_NO_ERR) { |
||
388 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
389 | "Error in dom_note_get_first_child"); |
||
390 | return HUBBUB_UNKNOWN; |
||
391 | } |
||
392 | if (child == NULL) |
||
393 | break; |
||
394 | |||
395 | err = dom_node_remove_child(node, (struct dom_node *) child, |
||
396 | &result); |
||
397 | if (err != DOM_NO_ERR) { |
||
398 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
399 | "Error in dom_node_remove_child"); |
||
400 | goto fail; |
||
401 | } |
||
402 | dom_node_unref(result); |
||
403 | |||
404 | err = dom_node_append_child((struct dom_node *) new_parent, |
||
405 | (struct dom_node *) child, &result); |
||
406 | if (err != DOM_NO_ERR) { |
||
407 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
408 | "Error in dom_node_append_child"); |
||
409 | goto fail; |
||
410 | } |
||
411 | dom_node_unref(result); |
||
412 | dom_node_unref(child); |
||
413 | } |
||
414 | return HUBBUB_OK; |
||
415 | |||
416 | fail: |
||
417 | dom_node_unref(child); |
||
418 | return HUBBUB_UNKNOWN; |
||
419 | } |
||
420 | |||
421 | static hubbub_error get_parent(void *parser, void *node, bool element_only, |
||
422 | void **result) |
||
423 | { |
||
424 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
||
425 | dom_exception err; |
||
426 | struct dom_node *parent; |
||
427 | dom_node_type type = DOM_NODE_TYPE_COUNT; |
||
428 | |||
429 | err = dom_node_get_parent_node((struct dom_node *) node, |
||
430 | &parent); |
||
431 | if (err != DOM_NO_ERR) { |
||
432 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
433 | "Error in dom_node_get_parent"); |
||
434 | return HUBBUB_UNKNOWN; |
||
435 | } |
||
436 | if (element_only == false) { |
||
437 | *result = parent; |
||
438 | return HUBBUB_OK; |
||
439 | } |
||
440 | |||
441 | err = dom_node_get_node_type(parent, &type); |
||
442 | if (err != DOM_NO_ERR) { |
||
443 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
444 | "Error in dom_node_get_type"); |
||
445 | goto fail; |
||
446 | } |
||
447 | if (type == DOM_ELEMENT_NODE) { |
||
448 | *result = parent; |
||
449 | return HUBBUB_OK; |
||
450 | } else { |
||
451 | *result = NULL; |
||
452 | dom_node_unref(parent); |
||
453 | return HUBBUB_OK; |
||
454 | } |
||
455 | |||
456 | return HUBBUB_OK; |
||
457 | fail: |
||
458 | dom_node_unref(parent); |
||
459 | return HUBBUB_UNKNOWN; |
||
460 | } |
||
461 | |||
462 | static hubbub_error has_children(void *parser, void *node, bool *result) |
||
463 | { |
||
464 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
||
465 | dom_exception err; |
||
466 | |||
467 | err = dom_node_has_child_nodes((struct dom_node *) node, result); |
||
468 | if (err != DOM_NO_ERR) { |
||
469 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
470 | "Error in dom_node_has_child_nodes"); |
||
471 | return HUBBUB_UNKNOWN; |
||
472 | } |
||
473 | return HUBBUB_OK; |
||
474 | } |
||
475 | |||
476 | static hubbub_error form_associate(void *parser, void *form, void *node) |
||
477 | { |
||
4224 | sourcerer | 478 | |
3584 | sourcerer | 479 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
480 | dom_html_form_element *form_ele = form; |
||
481 | dom_node_internal *ele = node; |
||
482 | dom_html_document *doc = (dom_html_document *)ele->owner; |
||
4224 | sourcerer | 483 | dom_exception err = DOM_NO_ERR; |
3584 | sourcerer | 484 | |
485 | /* Determine the kind of the node we have here. */ |
||
4224 | sourcerer | 486 | if (dom_string_caseless_isequal(ele->name, |
3584 | sourcerer | 487 | doc->memoised[hds_BUTTON])) { |
488 | err = _dom_html_button_element_set_form( |
||
489 | (dom_html_button_element *)node, form_ele); |
||
490 | if (err != DOM_NO_ERR) { |
||
491 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
492 | "Error in form_associate"); |
||
493 | return HUBBUB_UNKNOWN; |
||
494 | } |
||
495 | } else if (dom_string_caseless_isequal(ele->name, |
||
496 | doc->memoised[hds_INPUT])) { |
||
497 | err = _dom_html_input_element_set_form( |
||
498 | (dom_html_input_element *)node, form_ele); |
||
499 | if (err != DOM_NO_ERR) { |
||
500 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
501 | "Error in form_associate"); |
||
502 | return HUBBUB_UNKNOWN; |
||
503 | } |
||
504 | } else if (dom_string_caseless_isequal(ele->name, |
||
505 | doc->memoised[hds_SELECT])) { |
||
506 | err = _dom_html_select_element_set_form( |
||
507 | (dom_html_select_element *)node, form_ele); |
||
508 | if (err != DOM_NO_ERR) { |
||
509 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
510 | "Error in form_associate"); |
||
511 | return HUBBUB_UNKNOWN; |
||
512 | } |
||
513 | } else if (dom_string_caseless_isequal(ele->name, |
||
514 | doc->memoised[hds_TEXTAREA])) { |
||
515 | err = _dom_html_text_area_element_set_form( |
||
516 | (dom_html_text_area_element *)node, form_ele); |
||
517 | if (err != DOM_NO_ERR) { |
||
518 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
519 | "Error in form_associate"); |
||
520 | return HUBBUB_UNKNOWN; |
||
521 | } |
||
4224 | sourcerer | 522 | } |
3584 | sourcerer | 523 | |
524 | return HUBBUB_OK; |
||
525 | } |
||
526 | |||
527 | static hubbub_error add_attributes(void *parser, void *node, |
||
528 | const hubbub_attribute *attributes, uint32_t n_attributes) |
||
529 | { |
||
530 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
||
531 | dom_exception err; |
||
532 | uint32_t i; |
||
533 | |||
534 | for (i = 0; i < n_attributes; i++) { |
||
535 | dom_string *name, *value; |
||
536 | |||
537 | err = dom_string_create_interned(attributes[i].name.ptr, |
||
538 | attributes[i].name.len, &name); |
||
539 | if (err != DOM_NO_ERR) { |
||
540 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
541 | "Can't create attribute name"); |
||
542 | goto fail; |
||
543 | } |
||
544 | |||
545 | err = dom_string_create(attributes[i].value.ptr, |
||
546 | attributes[i].value.len, &value); |
||
547 | if (err != DOM_NO_ERR) { |
||
548 | dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx, |
||
549 | "Can't create attribute value"); |
||
550 | dom_string_unref(name); |
||
551 | goto fail; |
||
552 | } |
||
553 | |||
554 | if (attributes[i].ns == HUBBUB_NS_NULL) { |
||
555 | err = dom_element_set_attribute( |
||
556 | (struct dom_element *) node, name, |
||
557 | value); |
||
558 | dom_string_unref(name); |
||
559 | dom_string_unref(value); |
||
560 | if (err != DOM_NO_ERR) { |
||
561 | dom_parser->msg(DOM_MSG_CRITICAL, |
||
562 | dom_parser->mctx, |
||
563 | "Can't add attribute"); |
||
564 | } |
||
565 | } else { |
||
566 | err = dom_element_set_attribute_ns( |
||
567 | (struct dom_element *) node, |
||
568 | dom_namespaces[attributes[i].ns], name, |
||
569 | value); |
||
570 | dom_string_unref(name); |
||
571 | dom_string_unref(value); |
||
572 | if (err != DOM_NO_ERR) { |
||
573 | dom_parser->msg(DOM_MSG_CRITICAL, |
||
574 | dom_parser->mctx, |
||
575 | "Can't add attribute ns"); |
||
576 | } |
||
577 | } |
||
578 | } |
||
579 | |||
580 | return HUBBUB_OK; |
||
581 | |||
582 | fail: |
||
583 | return HUBBUB_UNKNOWN; |
||
584 | } |
||
585 | |||
586 | static hubbub_error set_quirks_mode(void *parser, hubbub_quirks_mode mode) |
||
587 | { |
||
588 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
||
589 | |||
590 | switch (mode) { |
||
591 | case HUBBUB_QUIRKS_MODE_NONE: |
||
592 | dom_document_set_quirks_mode(dom_parser->doc, |
||
593 | DOM_DOCUMENT_QUIRKS_MODE_NONE); |
||
594 | break; |
||
595 | case HUBBUB_QUIRKS_MODE_LIMITED: |
||
596 | dom_document_set_quirks_mode(dom_parser->doc, |
||
597 | DOM_DOCUMENT_QUIRKS_MODE_LIMITED); |
||
598 | break; |
||
599 | case HUBBUB_QUIRKS_MODE_FULL: |
||
600 | dom_document_set_quirks_mode(dom_parser->doc, |
||
601 | DOM_DOCUMENT_QUIRKS_MODE_FULL); |
||
602 | break; |
||
603 | } |
||
604 | |||
605 | return HUBBUB_OK; |
||
606 | } |
||
607 | |||
608 | static hubbub_error change_encoding(void *parser, const char *charset) |
||
609 | { |
||
610 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
||
611 | uint32_t source; |
||
612 | const char *name; |
||
613 | |||
614 | /* If we have an encoding here, it means we are *certain* */ |
||
615 | if (dom_parser->encoding != NULL) { |
||
616 | return HUBBUB_OK; |
||
617 | } |
||
618 | |||
619 | /* Find the confidence otherwise (can only be from a BOM) */ |
||
620 | name = hubbub_parser_read_charset(dom_parser->parser, &source); |
||
621 | |||
622 | if (source == HUBBUB_CHARSET_CONFIDENT) { |
||
623 | dom_parser->encoding_source = DOM_HUBBUB_ENCODING_SOURCE_DETECTED; |
||
624 | dom_parser->encoding = charset; |
||
625 | return HUBBUB_OK; |
||
626 | } |
||
627 | |||
628 | /* So here we have something of confidence tentative... */ |
||
629 | /* http://www.whatwg.org/specs/web-apps/current-work/#change */ |
||
630 | |||
631 | /* 2. "If the new encoding is identical or equivalent to the encoding |
||
632 | * that is already being used to interpret the input stream, then set |
||
633 | * the confidence to confident and abort these steps." */ |
||
634 | |||
635 | /* Whatever happens, the encoding should be set here; either for |
||
636 | * reprocessing with a different charset, or for confirming that the |
||
637 | * charset is in fact correct */ |
||
638 | dom_parser->encoding = charset; |
||
639 | dom_parser->encoding_source = DOM_HUBBUB_ENCODING_SOURCE_META; |
||
640 | |||
641 | /* Equal encodings will have the same string pointers */ |
||
642 | return (charset == name) ? HUBBUB_OK : HUBBUB_ENCODINGCHANGE; |
||
643 | } |
||
644 | |||
645 | static hubbub_error complete_script(void *parser, void *script) |
||
646 | { |
||
647 | dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; |
||
648 | dom_hubbub_error err; |
||
649 | |||
650 | err = dom_parser->script(dom_parser->mctx, (struct dom_node *)script); |
||
651 | |||
652 | if (err == DOM_HUBBUB_OK) { |
||
653 | return HUBBUB_OK; |
||
654 | } |
||
655 | |||
656 | if ((err & DOM_HUBBUB_HUBBUB_ERR) != 0) { |
||
657 | return err & (~DOM_HUBBUB_HUBBUB_ERR); |
||
658 | } |
||
659 | |||
660 | return HUBBUB_UNKNOWN; |
||
661 | } |
||
662 | |||
663 | static hubbub_tree_handler tree_handler = { |
||
664 | create_comment, |
||
665 | create_doctype, |
||
666 | create_element, |
||
667 | create_text, |
||
668 | ref_node, |
||
669 | unref_node, |
||
670 | append_child, |
||
671 | insert_before, |
||
672 | remove_child, |
||
673 | clone_node, |
||
674 | reparent_children, |
||
675 | get_parent, |
||
676 | has_children, |
||
677 | form_associate, |
||
678 | add_attributes, |
||
679 | set_quirks_mode, |
||
680 | change_encoding, |
||
681 | complete_script, |
||
682 | NULL |
||
683 | }; |
||
684 | |||
685 | /** |
||
686 | * Memory allocator |
||
687 | */ |
||
688 | static void *dom_hubbub_alloc(void *ptr, size_t len, void *pw) |
||
689 | { |
||
690 | UNUSED(pw); |
||
691 | |||
692 | if (ptr == NULL) |
||
693 | return len > 0 ? malloc(len) : NULL; |
||
694 | |||
695 | if (len == 0) { |
||
696 | free(ptr); |
||
697 | return NULL; |
||
698 | } |
||
699 | |||
700 | return realloc(ptr, len); |
||
701 | } |
||
702 | |||
703 | /** |
||
704 | * Default message callback |
||
705 | */ |
||
706 | static void dom_hubbub_parser_default_msg(uint32_t severity, void *ctx, |
||
707 | const char *msg, ...) |
||
708 | { |
||
709 | UNUSED(severity); |
||
710 | UNUSED(ctx); |
||
711 | UNUSED(msg); |
||
712 | } |
||
713 | |||
714 | /** |
||
715 | * Default script callback. |
||
716 | */ |
||
717 | static dom_hubbub_error |
||
718 | dom_hubbub_parser_default_script(void *ctx, struct dom_node *node) |
||
719 | { |
||
720 | UNUSED(ctx); |
||
721 | UNUSED(node); |
||
722 | return DOM_HUBBUB_OK; |
||
723 | } |
||
724 | |||
725 | /** |
||
726 | * Create a Hubbub parser instance |
||
727 | * |
||
728 | * \param params The binding creation parameters |
||
729 | * \param parser Pointer to location to recive instance. |
||
730 | * \param document Pointer to location to receive document. |
||
731 | * \return Error code |
||
732 | */ |
||
733 | dom_hubbub_error |
||
734 | dom_hubbub_parser_create(dom_hubbub_parser_params *params, |
||
735 | dom_hubbub_parser **parser, |
||
736 | dom_document **document) |
||
737 | { |
||
738 | dom_hubbub_parser *binding; |
||
739 | hubbub_parser_optparams optparams; |
||
740 | hubbub_error error; |
||
741 | dom_exception err; |
||
742 | dom_string *idname = NULL; |
||
743 | |||
744 | /* check result parameters */ |
||
745 | if (document == NULL) { |
||
746 | return DOM_HUBBUB_BADPARM; |
||
747 | } |
||
748 | |||
749 | if (parser == NULL) { |
||
750 | return DOM_HUBBUB_BADPARM; |
||
751 | } |
||
752 | |||
753 | /* setup binding parser context */ |
||
754 | binding = malloc(sizeof(dom_hubbub_parser)); |
||
755 | if (binding == NULL) { |
||
756 | return DOM_HUBBUB_NOMEM; |
||
757 | } |
||
758 | |||
759 | binding->parser = NULL; |
||
760 | binding->doc = NULL; |
||
761 | binding->encoding = params->enc; |
||
762 | |||
763 | if (params->enc != NULL) { |
||
764 | binding->encoding_source = DOM_HUBBUB_ENCODING_SOURCE_HEADER; |
||
765 | } else { |
||
766 | binding->encoding_source = DOM_HUBBUB_ENCODING_SOURCE_DETECTED; |
||
767 | } |
||
768 | |||
769 | binding->complete = false; |
||
770 | |||
771 | if (params->msg == NULL) { |
||
772 | binding->msg = dom_hubbub_parser_default_msg; |
||
773 | } else { |
||
774 | binding->msg = params->msg; |
||
775 | } |
||
776 | binding->mctx = params->ctx; |
||
777 | |||
778 | /* ensure script function is valid or use the default */ |
||
779 | if (params->script == NULL) { |
||
780 | binding->script = dom_hubbub_parser_default_script; |
||
781 | } else { |
||
782 | binding->script = params->script; |
||
783 | } |
||
784 | |||
785 | /* create hubbub parser */ |
||
786 | error = hubbub_parser_create(binding->encoding, |
||
787 | params->fix_enc, |
||
788 | dom_hubbub_alloc, |
||
789 | NULL, |
||
790 | &binding->parser); |
||
791 | if (error != HUBBUB_OK) { |
||
792 | free(binding); |
||
793 | return (DOM_HUBBUB_HUBBUB_ERR | error); |
||
794 | } |
||
795 | |||
796 | /* create DOM document */ |
||
797 | err = dom_implementation_create_document(DOM_IMPLEMENTATION_HTML, |
||
798 | NULL, |
||
799 | NULL, |
||
800 | NULL, |
||
801 | params->daf, |
||
802 | params->ctx, |
||
803 | &binding->doc); |
||
804 | if (err != DOM_NO_ERR) { |
||
805 | hubbub_parser_destroy(binding->parser); |
||
806 | free(binding); |
||
807 | return DOM_HUBBUB_DOM; |
||
808 | } |
||
809 | |||
810 | binding->tree_handler = tree_handler; |
||
811 | binding->tree_handler.ctx = (void *)binding; |
||
812 | |||
813 | /* set tree handler on parser */ |
||
814 | optparams.tree_handler = &binding->tree_handler; |
||
815 | hubbub_parser_setopt(binding->parser, |
||
816 | HUBBUB_PARSER_TREE_HANDLER, |
||
817 | &optparams); |
||
818 | |||
819 | /* set document node*/ |
||
820 | optparams.document_node = dom_node_ref((struct dom_node *)binding->doc); |
||
821 | hubbub_parser_setopt(binding->parser, |
||
822 | HUBBUB_PARSER_DOCUMENT_NODE, |
||
823 | &optparams); |
||
824 | |||
825 | /* set scripting state */ |
||
826 | optparams.enable_scripting = params->enable_script; |
||
827 | hubbub_parser_setopt(binding->parser, |
||
828 | HUBBUB_PARSER_ENABLE_SCRIPTING, |
||
829 | &optparams); |
||
830 | |||
831 | /* set the document id parameter before the parse so searches |
||
832 | * based on id succeed. |
||
833 | */ |
||
834 | err = dom_string_create_interned((const uint8_t *) "id", |
||
835 | SLEN("id"), |
||
836 | &idname); |
||
837 | if (err != DOM_NO_ERR) { |
||
838 | binding->msg(DOM_MSG_ERROR, binding->mctx, "Can't set DOM document id name"); |
||
839 | hubbub_parser_destroy(binding->parser); |
||
840 | free(binding); |
||
841 | return DOM_HUBBUB_DOM; |
||
842 | } |
||
843 | _dom_document_set_id_name(binding->doc, idname); |
||
844 | dom_string_unref(idname); |
||
845 | |||
846 | /* set return parameters */ |
||
847 | *document = (dom_document *)dom_node_ref(binding->doc); |
||
848 | *parser = binding; |
||
849 | |||
850 | return DOM_HUBBUB_OK; |
||
851 | } |
||
852 | |||
853 | |||
854 | dom_hubbub_error |
||
855 | dom_hubbub_parser_insert_chunk(dom_hubbub_parser *parser, |
||
856 | const uint8_t *data, |
||
857 | size_t length) |
||
858 | { |
||
859 | hubbub_parser_insert_chunk(parser->parser, data, length); |
||
860 | |||
861 | return DOM_HUBBUB_OK; |
||
862 | } |
||
863 | |||
864 | |||
865 | /** |
||
866 | * Destroy a Hubbub parser instance |
||
867 | * |
||
868 | * \param parser The Hubbub parser object |
||
869 | */ |
||
870 | void dom_hubbub_parser_destroy(dom_hubbub_parser *parser) |
||
871 | { |
||
872 | hubbub_parser_destroy(parser->parser); |
||
873 | parser->parser = NULL; |
||
874 | |||
875 | if (parser->doc != NULL) { |
||
876 | dom_node_unref((struct dom_node *) parser->doc); |
||
877 | parser->doc = NULL; |
||
878 | } |
||
879 | |||
880 | free(parser); |
||
881 | } |
||
882 | |||
883 | /** |
||
884 | * Parse data with Hubbub parser |
||
885 | * |
||
886 | * \param parser The parser object |
||
887 | * \param data The data to be parsed |
||
888 | * \param len The length of the data to be parsed |
||
889 | * \return DOM_HUBBUB_OK on success, |
||
890 | * DOM_HUBBUB_HUBBUB_ERR | |
||
891 | */ |
||
892 | dom_hubbub_error dom_hubbub_parser_parse_chunk(dom_hubbub_parser *parser, |
||
893 | const uint8_t *data, size_t len) |
||
894 | { |
||
895 | hubbub_error err; |
||
896 | |||
897 | err = hubbub_parser_parse_chunk(parser->parser, data, len); |
||
898 | if (err != HUBBUB_OK) |
||
899 | return DOM_HUBBUB_HUBBUB_ERR | err; |
||
900 | |||
901 | return DOM_HUBBUB_OK; |
||
902 | } |
||
903 | |||
904 | /** |
||
905 | * Notify the parser to complete parsing |
||
906 | * |
||
907 | * \param parser The parser object |
||
908 | * \return DOM_HUBBUB_OK on success, |
||
909 | * DOM_HUBBUB_HUBBUB_ERR | |
||
910 | * DOMHUBBUB_UNKNOWN | |
||
911 | */ |
||
912 | dom_hubbub_error dom_hubbub_parser_completed(dom_hubbub_parser *parser) |
||
913 | { |
||
914 | hubbub_error err; |
||
915 | |||
916 | err = hubbub_parser_completed(parser->parser); |
||
917 | if (err != HUBBUB_OK) { |
||
918 | parser->msg(DOM_MSG_ERROR, parser->mctx, |
||
919 | "hubbub_parser_completed failed: %d", err); |
||
920 | return DOM_HUBBUB_HUBBUB_ERR | err; |
||
921 | } |
||
922 | |||
923 | parser->complete = true; |
||
924 | |||
925 | return DOM_HUBBUB_OK; |
||
926 | } |
||
927 | |||
928 | /** |
||
929 | * Retrieve the encoding |
||
930 | * |
||
931 | * \param parser The parser object |
||
932 | * \param source The encoding_source |
||
933 | * \return the encoding name |
||
934 | */ |
||
935 | const char *dom_hubbub_parser_get_encoding(dom_hubbub_parser *parser, |
||
936 | dom_hubbub_encoding_source *source) |
||
937 | { |
||
938 | *source = parser->encoding_source; |
||
939 | |||
940 | return parser->encoding != NULL ? parser->encoding |
||
941 | : "Windows-1252"; |
||
942 | } |
||
943 | |||
944 | /** |
||
945 | * Set the Parse pause state. |
||
946 | * |
||
947 | * \param parser The parser object |
||
948 | * \param pause The pause state to set. |
||
949 | * \return DOM_HUBBUB_OK on success, |
||
950 | * DOM_HUBBUB_HUBBUB_ERR | |
||
951 | */ |
||
952 | dom_hubbub_error dom_hubbub_parser_pause(dom_hubbub_parser *parser, bool pause) |
||
953 | { |
||
954 | hubbub_error err; |
||
955 | hubbub_parser_optparams params; |
||
956 | |||
957 | params.pause_parse = pause; |
||
958 | err = hubbub_parser_setopt(parser->parser, HUBBUB_PARSER_PAUSE, ¶ms); |
||
959 | if (err != HUBBUB_OK) |
||
960 | return DOM_HUBBUB_HUBBUB_ERR | err; |
||
961 | |||
962 | return DOM_HUBBUB_OK; |
||
963 | }>>>>>>>>>> |