Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * This file is part of Hubbub.
  3.  * Licensed under the MIT License,
  4.  *                http://www.opensource.org/licenses/mit-license.php
  5.  *
  6.  * Copyright 2008 Andrew Sidwell <takkaria@netsurf-browser.org>
  7.  * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
  8.  */
  9.  
  10. #define _GNU_SOURCE /* for strndup */
  11. #include <assert.h>
  12. #include <stdbool.h>
  13. #include <string.h>
  14.  
  15. #include <libxml/HTMLparser.h>
  16. #include <libxml/HTMLtree.h>
  17.  
  18. #include <hubbub/parser.h>
  19. #include <hubbub/tree.h>
  20.  
  21. #define UNUSED(x) ((x)=(x))
  22.  
  23. /**
  24.  * Error codes
  25.  */
  26. typedef enum error_code {
  27.         OK,
  28.         NOMEM,
  29.         BADENCODING,
  30.         ENCODINGCHANGE
  31. } error_code;
  32.  
  33. /**
  34.  * Source of encoding information
  35.  */
  36. typedef enum encoding_source {
  37.         ENCODING_SOURCE_HEADER,
  38.         ENCODING_SOURCE_DETECTED,
  39.         ENCODING_SOURCE_META
  40. } encoding_source;
  41.  
  42. /**
  43.  * Our context
  44.  */
  45. typedef struct context {
  46.         hubbub_parser *parser;                  /**< Underlying parser */
  47.  
  48.         htmlDocPtr document;                    /**< Document we're building */
  49.  
  50.         const char *encoding;                   /**< The charset of the input */
  51.         encoding_source enc_source;             /**< The encoding source */
  52.  
  53. #define NUM_NAMESPACES (6)
  54.         xmlNsPtr namespaces[NUM_NAMESPACES];    /**< XML namespaces */
  55. #undef NUM_NAMESPACES
  56.  
  57.         hubbub_tree_handler tree_handler;       /**< Hubbub tree callbacks */
  58. } context;
  59.  
  60. /**
  61.  * Mapping of namespace prefixes to URIs, indexed by hubbub_ns.
  62.  */
  63. static struct {
  64.         const char *prefix;
  65.         const char *url;
  66. } namespaces[] = {
  67.         { NULL, NULL },
  68.         { NULL, "http://www.w3.org/1999/xhtml" },
  69.         { "math", "http://www.w3.org/1998/Math/MathML" },
  70.         { "svg", "http://www.w3.org/2000/svg" },
  71.         { "xlink", "http://www.w3.org/1999/xlink" },
  72.         /** \todo Oh dear. LibXML2 refuses to create any namespace with a
  73.          * prefix of "xml". That sucks, royally. */
  74.         { "xml", "http://www.w3.org/XML/1998/namespace" },
  75.         { "xmlns", "http://www.w3.org/2000/xmlns/" }
  76. };
  77.  
  78. static inline char *c_string_from_hubbub_string(context *ctx,
  79.                 const hubbub_string *str);
  80. static void create_namespaces(context *ctx, xmlNode *root);
  81. static hubbub_error create_comment(void *ctx, const hubbub_string *data,
  82.                 void **result);
  83. static hubbub_error create_doctype(void *ctx, const hubbub_doctype *doctype,
  84.                 void **result);
  85. static hubbub_error create_element(void *ctx, const hubbub_tag *tag,
  86.                 void **result);
  87. static hubbub_error create_text(void *ctx, const hubbub_string *data,
  88.                 void **result);
  89. static hubbub_error ref_node(void *ctx, void *node);
  90. static hubbub_error unref_node(void *ctx, void *node);
  91. static hubbub_error append_child(void *ctx, void *parent, void *child,
  92.                 void **result);
  93. static hubbub_error insert_before(void *ctx, void *parent, void *child,
  94.                 void *ref_child, void **result);
  95. static hubbub_error remove_child(void *ctx, void *parent, void *child,
  96.                 void **result);
  97. static hubbub_error clone_node(void *ctx, void *node, bool deep, void **result);
  98. static hubbub_error reparent_children(void *ctx, void *node, void *new_parent);
  99. static hubbub_error get_parent(void *ctx, void *node, bool element_only,
  100.                 void **result);
  101. static hubbub_error has_children(void *ctx, void *node, bool *result);
  102. static hubbub_error form_associate(void *ctx, void *form, void *node);
  103. static hubbub_error add_attributes(void *ctx, void *node,
  104.                 const hubbub_attribute *attributes, uint32_t n_attributes);
  105. static hubbub_error set_quirks_mode(void *ctx, hubbub_quirks_mode mode);
  106. static hubbub_error change_encoding(void *ctx, const char *charset);
  107.  
  108. /* Prototype tree handler struct */
  109. static hubbub_tree_handler tree_handler = {
  110.         create_comment,
  111.         create_doctype,
  112.         create_element,
  113.         create_text,
  114.         ref_node,
  115.         unref_node,
  116.         append_child,
  117.         insert_before,
  118.         remove_child,
  119.         clone_node,
  120.         reparent_children,
  121.         get_parent,
  122.         has_children,
  123.         form_associate,
  124.         add_attributes,
  125.         set_quirks_mode,
  126.         change_encoding,
  127.         NULL
  128. };
  129.  
  130. /**
  131.  * Memory allocation callback.
  132.  *
  133.  * \param ptr  Pointer to block to reallocate, or NULL for a new allocation
  134.  * \param len  Required length, in bytes. If zero, then free the block
  135.  * \param pw   Pointer to our private data
  136.  * \return Pointer to resized block
  137.  */
  138. static void *myrealloc(void *ptr, size_t len, void *pw)
  139. {
  140.         /* In this implementation, we just call realloc.
  141.          * If we have more complex allocation requirements (e.g. multiple
  142.          * allocation arenas, then we could use pw to point to the arena to use)
  143.          */
  144.         UNUSED(pw);
  145.  
  146.         return realloc(ptr, len);
  147. }
  148.  
  149. /******************************************************************************
  150.  * Main hubbub driver code                                                    *
  151.  ******************************************************************************/
  152. static error_code create_context(const char *charset, context **ctx);
  153. static void destroy_context(context *c);
  154. static error_code parse_chunk(context *c, const uint8_t *data, size_t len);
  155. static error_code parse_completed(context *c);
  156.  
  157. int main(int argc, char **argv)
  158. {
  159.         error_code error;
  160.         context *c;
  161.         hubbub_parser_optparams params;
  162.         FILE *input;
  163.         uint8_t *buf;
  164.         size_t len;
  165.  
  166.         if (argc != 2) {
  167.                 fprintf(stderr, "Usage: %s <input>\n", argv[0]);
  168.                 return 1;
  169.         }
  170.  
  171.         /* Read input file into memory. If we wanted to, we could read into
  172.          * a fixed-size buffer and pass each chunk to the parser sequentially.
  173.          */
  174.         input = fopen(argv[1], "r");
  175.         if (input == NULL) {
  176.                 fprintf(stderr, "Failed opening %s\n", argv[1]);
  177.                 return 1;
  178.         }
  179.  
  180.         fseek(input, 0, SEEK_END);
  181.         len = ftell(input);
  182.         fseek(input, 0, SEEK_SET);
  183.  
  184.         buf = malloc(len);
  185.         if (buf == NULL) {
  186.                 fclose(input);
  187.                 fprintf(stderr, "No memory for buf\n");
  188.                 return 1;
  189.         }
  190.  
  191.         fread(buf, 1, len, input);
  192.  
  193.         /* Create our parsing context */
  194.         error = create_context(NULL, &c);
  195.         if (error != OK) {
  196.                 free(buf);
  197.                 fclose(input);
  198.                 fprintf(stderr, "Failed creating parsing context\n");
  199.                 return 1;
  200.         }
  201.  
  202.         /* Attempt to parse the document */
  203.         error = parse_chunk(c, buf, len);
  204.         assert(error == OK || error == ENCODINGCHANGE);
  205.         if (error == ENCODINGCHANGE) {
  206.                 /* During parsing, we detected that the charset of the
  207.                  * input data was different from what was auto-detected
  208.                  * (see the change_encoding callback for more details).
  209.                  * Therefore, we must destroy the current parser and create
  210.                  * a new one using the newly-detected charset. Then we
  211.                  * reparse the data using the new parser.
  212.                  *
  213.                  * change_encoding() will have put the new charset into
  214.                  * c->encoding.
  215.                  */
  216.                 context *c2;
  217.  
  218.                 error = create_context(c->encoding, &c2);
  219.                 if (error != OK) {
  220.                         destroy_context(c2);
  221.                         free(buf);
  222.                         fclose(input);
  223.                         fprintf(stderr, "Failed recreating context\n");
  224.                         return 1;
  225.                 }
  226.  
  227.                 destroy_context(c);
  228.  
  229.                 c = c2;
  230.  
  231.                 /* Retry the parse */
  232.                 error = parse_chunk(c, buf, len);
  233.         }
  234.  
  235.         if (error != OK) {
  236.                 destroy_context(c);
  237.                 free(buf);
  238.                 fclose(input);
  239.                 fprintf(stderr, "Failed parsing document\n");
  240.                 return 1;
  241.         }
  242.  
  243.  
  244.         /* Tell hubbub that we've finished */
  245.         error = parse_completed(c);
  246.         if (error != OK) {
  247.                 destroy_context(c);
  248.                 free(buf);
  249.                 fclose(input);
  250.                 fprintf(stderr, "Failed parsing document\n");
  251.                 return 1;
  252.         }
  253.  
  254.         /* We're done with this */
  255.         free(buf);
  256.  
  257.         /* At this point, the DOM tree can be accessed through c->document */
  258.         /* Let's dump it to stdout */
  259.         /* In a real application, we'd probably want to grab the document
  260.          * from the parsing context, then destroy the context as it's no
  261.          * longer of any use */
  262.         xmlDebugDumpDocument(stdout, c->document);
  263.  
  264.         /* Clean up */
  265.         destroy_context(c);
  266.  
  267.         fclose(input);
  268.  
  269.         return 0;
  270. }
  271.  
  272. /**
  273.  * Create a parsing context
  274.  *
  275.  * \param charset  The charset the input data is in, or NULL to autodetect
  276.  * \param ctx      Location to receive context
  277.  * \return OK on success,
  278.  *         NOMEM on memory exhaustion,
  279.  *         BADENCODING if charset isn't supported
  280.  */
  281. error_code create_context(const char *charset, context **ctx)
  282. {
  283.         context *c;
  284.         hubbub_parser_optparams params;
  285.         uint32_t i;
  286.         hubbub_error error;
  287.  
  288.         c = malloc(sizeof(context));
  289.         if (c == NULL)
  290.                 return NOMEM;
  291.  
  292.         c->parser = NULL;
  293.         c->encoding = charset;
  294.         c->enc_source = ENCODING_SOURCE_HEADER;
  295.         c->document = NULL;
  296.  
  297.         /* Create the parser */
  298.         error = hubbub_parser_create(c->encoding, true, myrealloc, NULL,
  299.                         &c->parser);
  300.         if (error != HUBBUB_OK) {
  301.                 free(c);
  302.                 if (error == HUBBUB_BADENCODING)
  303.                         return BADENCODING;
  304.                 else
  305.                         return NOMEM;   /* Assume OOM */
  306.         }
  307.  
  308.         /* Create the root node of the document */
  309.         c->document = htmlNewDocNoDtD(NULL, NULL);
  310.         if (c->document == NULL) {
  311.                 hubbub_parser_destroy(c->parser);
  312.                 free(c);
  313.                 return NOMEM;
  314.         }
  315.         /* Reference count of zero */
  316.         c->document->_private = (void *) 0;
  317.  
  318.         for (i = 0;
  319.                 i < sizeof(c->namespaces) / sizeof(c->namespaces[0]); i++) {
  320.                 c->namespaces[i] = NULL;
  321.         }
  322.  
  323.         /* The following are both needed to make hubbub do anything. If it has
  324.          * no tree handler or document node registered, it won't attempt to
  325.          * build a tree. */
  326.  
  327.         /* Register tree handler with hubbub */
  328.         c->tree_handler = tree_handler;
  329.         c->tree_handler.ctx = (void *) c;
  330.  
  331.         params.tree_handler = &c->tree_handler;
  332.         hubbub_parser_setopt(c->parser, HUBBUB_PARSER_TREE_HANDLER, &params);
  333.  
  334.         /* Also tell it about the document node (referencing it first) */
  335.         ref_node(c, c->document);
  336.         params.document_node = c->document;
  337.         hubbub_parser_setopt(c->parser, HUBBUB_PARSER_DOCUMENT_NODE, &params);
  338.  
  339.         *ctx = c;
  340.  
  341.         return OK;
  342. }
  343.  
  344. /**
  345.  * Destroy a parsing context
  346.  *
  347.  * \param c  Context to destroy
  348.  */
  349. void destroy_context(context *c)
  350. {
  351.         if (c == NULL)
  352.                 return;
  353.  
  354.         if (c->parser != NULL)
  355.                 hubbub_parser_destroy(c->parser);
  356.  
  357.         xmlFreeDoc(c->document);
  358.  
  359.         c->parser = NULL;
  360.         c->encoding = NULL;
  361.         c->document = NULL;
  362.  
  363.         free(c);
  364.  
  365.         return;
  366. }
  367.  
  368. /**
  369.  * Parse a chunk of the input document
  370.  *
  371.  * \param c     Parsing context
  372.  * \param data  Data buffer
  373.  * \param len   Length, in bytes, of data in buffer
  374.  * \return OK on success,
  375.  *         ENCODINGCHANGE if the encoding needs changing
  376.  */
  377. error_code parse_chunk(context *c, const uint8_t *data, size_t len)
  378. {
  379.         hubbub_error err;
  380.  
  381.         err = hubbub_parser_parse_chunk(c->parser, (uint8_t *) data, len);
  382.         if (err == HUBBUB_ENCODINGCHANGE)
  383.                 return ENCODINGCHANGE;
  384.  
  385.         return OK;
  386. }
  387.  
  388. /**
  389.  * Inform that we've run out of input to parse
  390.  *
  391.  * \param c  Parsing context
  392.  * \return OK.
  393.  */
  394. error_code parse_completed(context *c)
  395. {
  396.         hubbub_error error;
  397.  
  398.         error = hubbub_parser_completed(c->parser);
  399.         /** \todo error handling */
  400.  
  401.         return OK;
  402. }
  403.  
  404. /******************************************************************************
  405.  * Helper functions for tree building                                         *
  406.  ******************************************************************************/
  407.  
  408. /**
  409.  * Convert a hubbub string to a C string
  410.  *
  411.  * \param ctx  Our context
  412.  * \param str  The string to convert
  413.  * \return Pointer to C string, must be freed
  414.  *
  415.  * This is a simple utility routine, as libXML expects data to be C strings.
  416.  * If we were implementing our own tree, we might store hubbub-style strings
  417.  * instead (with the associated memory saving)
  418.  */
  419. char *c_string_from_hubbub_string(context *ctx, const hubbub_string *str)
  420. {
  421.         return strndup((const char *) str->ptr, (int) str->len);
  422. }
  423.  
  424. /**
  425.  * Initialise a context's XML namespaces
  426.  *
  427.  * \param ctx   Our context
  428.  * \param root  The root node of the XML tree
  429.  *
  430.  * Again, this is specific to the needs of libXML.
  431.  */
  432. void create_namespaces(context *ctx, xmlNode *root)
  433. {
  434.         uint32_t i;
  435.  
  436.         /* Index 0 is the NULL namespace, so skip over it */
  437.         for (i = 1; i < sizeof(namespaces) / sizeof(namespaces[0]); i++) {
  438.                 ctx->namespaces[i - 1] = xmlNewNs(root,
  439.                                 BAD_CAST namespaces[i].url,
  440.                                 BAD_CAST namespaces[i].prefix);
  441.  
  442.                 /* Expect "xml" to fail here */
  443.                 if (ctx->namespaces[i - 1] == NULL) {
  444.                         fprintf(stderr,
  445.                                 "WARNING: Failed creating namespace %s\n",
  446.                                         namespaces[i].prefix);
  447.                 }
  448.         }
  449. }
  450.  
  451. /******************************************************************************
  452.  * Tree callbacks for hubbub                                                  *
  453.  ******************************************************************************/
  454.  
  455. /**
  456.  * Create a comment node
  457.  *
  458.  * \param ctx     Our context
  459.  * \param data    The comment body
  460.  * \param result  Location to receive manufactured node
  461.  * \return HUBBUB_OK on success, appropriate error otherwise
  462.  *
  463.  * Postcondition: if successful, result's reference count must be 1.
  464.  */
  465. hubbub_error create_comment(void *ctx, const hubbub_string *data, void **result)
  466. {
  467.         context *c = (context *) ctx;
  468.         char *content;
  469.         xmlNodePtr n;
  470.  
  471.         content = c_string_from_hubbub_string(c, data);
  472.         if (content == NULL)
  473.                 return HUBBUB_NOMEM;
  474.  
  475.         n = xmlNewDocComment(c->document, BAD_CAST content);
  476.         if (n == NULL) {
  477.                 free(content);
  478.                 return HUBBUB_NOMEM;
  479.         }
  480.         /* We use the _private field of libXML's xmlNode struct for the
  481.          * reference count. */
  482.         n->_private = (void *) (uintptr_t) 1;
  483.  
  484.         free(content);
  485.  
  486.         *result = (void *) n;
  487.  
  488.         return HUBBUB_OK;
  489. }
  490.  
  491. /**
  492.  * Create a doctype node
  493.  *
  494.  * \param ctx      Our context
  495.  * \param doctype  Data for doctype node (name, public ID and system ID)
  496.  * \param result   Location to receive manufactured node
  497.  * \return HUBBUB_OK on success, appropriate error otherwise
  498.  *
  499.  * Postcondition: if successful, result's reference count must be 1.
  500.  */
  501. hubbub_error create_doctype(void *ctx, const hubbub_doctype *doctype, void **result)
  502. {
  503.         context *c = (context *) ctx;
  504.         char *name, *public = NULL, *system = NULL;
  505.         xmlDtdPtr n;
  506.  
  507.         name = c_string_from_hubbub_string(c, &doctype->name);
  508.         if (name == NULL)
  509.                 return HUBBUB_NOMEM;
  510.  
  511.         /* May not have public ID */
  512.         if (!doctype->public_missing) {
  513.                 public = c_string_from_hubbub_string(c, &doctype->public_id);
  514.                 if (public == NULL) {
  515.                         free(name);
  516.                         return HUBBUB_NOMEM;
  517.                 }
  518.         }
  519.  
  520.         /* May not have system ID */
  521.         if (!doctype->system_missing) {
  522.                 system = c_string_from_hubbub_string(c, &doctype->system_id);
  523.                 if (system == NULL) {
  524.                         free(public);
  525.                         free(name);
  526.                         return HUBBUB_NOMEM;
  527.                 }
  528.         }
  529.  
  530.         n = xmlNewDtd(c->document, BAD_CAST name,
  531.                         BAD_CAST (public ? public : ""),
  532.                         BAD_CAST (system ? system : ""));
  533.         if (n == NULL) {
  534.                 free(system);
  535.                 free(public);
  536.                 free(name);
  537.                 return HUBBUB_NOMEM;
  538.         }
  539.         /* Again, reference count must be 1 */
  540.         n->_private = (void *) (uintptr_t) 1;
  541.  
  542.         *result = (void *) n;
  543.  
  544.         free(system);
  545.         free(public);
  546.         free(name);
  547.  
  548.         return HUBBUB_OK;
  549. }
  550.  
  551. /**
  552.  * Create an element node
  553.  *
  554.  * \param ctx     Our context
  555.  * \param tag     Data for node
  556.  * \param result  Location to receive manufactured node
  557.  * \return HUBBUB_OK on success, appropriate error otherwise
  558.  *
  559.  * Postcondition: if successful, result's reference count must be 1.
  560.  */
  561. hubbub_error create_element(void *ctx, const hubbub_tag *tag, void **result)
  562. {
  563.         context *c = (context *) ctx;
  564.         char *name;
  565.         xmlNodePtr n;
  566.  
  567.         name = c_string_from_hubbub_string(c, &tag->name);
  568.         if (name == NULL)
  569.                 return HUBBUB_NOMEM;
  570.  
  571.         if (c->namespaces[0] != NULL) {
  572.                 n = xmlNewDocNode(c->document, c->namespaces[tag->ns - 1],
  573.                                 BAD_CAST name, NULL);
  574.         } else {
  575.                 n = xmlNewDocNode(c->document, NULL, BAD_CAST name, NULL);
  576.  
  577.                 /* We're creating the root node of the document. Therefore,
  578.                  * create the namespaces and set this node's namespace */
  579.                 if (n != NULL && c->namespaces[0] == NULL) {
  580.                         create_namespaces(c, (void *) n);
  581.  
  582.                         xmlSetNs(n, c->namespaces[tag->ns - 1]);
  583.                 }
  584.         }
  585.         if (n == NULL) {
  586.                 free(name);
  587.                 return HUBBUB_NOMEM;
  588.         }
  589.         /* Reference count must be 1 */
  590.         n->_private = (void *) (uintptr_t) 1;
  591.  
  592.         /* Attempt to add attributes to node */
  593.         if (tag->n_attributes > 0 && add_attributes(ctx, (void *) n,
  594.                         tag->attributes, tag->n_attributes) != 0) {
  595.                 xmlFreeNode(n);
  596.                 free(name);
  597.                 return HUBBUB_NOMEM;
  598.         }
  599.  
  600.         *result = (void *) n;
  601.  
  602.         free(name);
  603.  
  604.         return HUBBUB_OK;
  605. }
  606.  
  607. /**
  608.  * Create a text node
  609.  *
  610.  * \param ctx     Our context
  611.  * \param data    Node data
  612.  * \param result  Location to receive manufactured node
  613.  * \return HUBBUB_OK on success, appropriate error otherwise
  614.  *
  615.  * Postcondition: if successfult, result's reference count must be 1.
  616.  */
  617. hubbub_error create_text(void *ctx, const hubbub_string *data, void **result)
  618. {
  619.         context *c = (context *) ctx;
  620.         xmlNodePtr n;
  621.  
  622.         n = xmlNewDocTextLen(c->document, BAD_CAST data->ptr, (int) data->len);
  623.         if (n == NULL) {
  624.                 return HUBBUB_NOMEM;
  625.         }
  626.         /* Reference count must be 1 */
  627.         n->_private = (void *) (uintptr_t) 1;
  628.  
  629.         *result = (void *) n;
  630.  
  631.         return HUBBUB_OK;
  632. }
  633.  
  634. /**
  635.  * Increase a node's reference count
  636.  *
  637.  * \param ctx   Our context
  638.  * \param node  The node to reference
  639.  * \return HUBBUB_OK on success, appropriate error otherwise
  640.  */
  641. hubbub_error ref_node(void *ctx, void *node)
  642. {
  643.         context *c = (context *) ctx;
  644.  
  645.         if (node == c->document) {
  646.                 xmlDoc *n = (xmlDoc *) node;
  647.                 uintptr_t count = (uintptr_t) n->_private;
  648.  
  649.                 n->_private = (void *) ++count;
  650.         } else {
  651.                 xmlNode *n = (xmlNode *) node;
  652.                 uintptr_t count = (uintptr_t) n->_private;
  653.  
  654.                 n->_private = (void *) ++count;
  655.         }
  656.  
  657.         return HUBBUB_OK;
  658. }
  659.  
  660. /**
  661.  * Decrease a node's reference count
  662.  *
  663.  * \param ctx   Our context
  664.  * \param node  The node to unreference
  665.  * \return HUBBUB_OK on success, appropriate error otherwise
  666.  *
  667.  * Postcondition: If the node's reference count becomes zero, and it has no
  668.  * parent, and it is not the document node, then it is destroyed.
  669.  */
  670. hubbub_error unref_node(void *ctx, void *node)
  671. {
  672.         context *c = (context *) ctx;
  673.  
  674.         if (node == c->document) {
  675.                 xmlDoc *n = (xmlDoc *) node;
  676.                 uintptr_t count = (uintptr_t) n->_private;
  677.  
  678.                 /* Trap any attempt to unref a non-referenced node */
  679.                 assert(count != 0 && "Node has refcount of zero");
  680.  
  681.                 /* Never destroy document node */
  682.  
  683.                 n->_private = (void *) --count;
  684.         } else {
  685.                 xmlNode *n = (xmlNode *) node;
  686.                 uintptr_t count = (uintptr_t) n->_private;
  687.  
  688.                 /* Trap any attempt to unref a non-referenced node */
  689.                 assert(count != 0 && "Node has refcount of zero");
  690.  
  691.                 n->_private = (void *) --count;
  692.  
  693.                 /* Destroy node, if it has no parent */
  694.                 if (count == 0 && n->parent == NULL) {
  695.                         xmlFreeNode(n);
  696.                 }
  697.         }
  698.  
  699.         return HUBBUB_OK;
  700. }
  701.  
  702. /**
  703.  * Append a node to the end of another's child list
  704.  *
  705.  * \param ctx     Our context
  706.  * \param parent  The node to append to
  707.  * \param child   The node to append
  708.  * \param result  Location to receive appended node
  709.  * \return HUBBUB_OK on success, appropriate error otherwise
  710.  *
  711.  * Postcondition: if successful, result's reference count is increased by 1
  712.  *
  713.  * Important: *result may not == child (e.g. if text nodes got coalesced)
  714.  */
  715. hubbub_error append_child(void *ctx, void *parent, void *child, void **result)
  716. {
  717.         xmlNode *chld = (xmlNode *) child;
  718.         xmlNode *p = (xmlNode *) parent;
  719.  
  720.         /* Note: this does not exactly follow the current specification.
  721.          * See http://www.whatwg.org/specs/web-apps/current-work/ \
  722.          *     multipage/tree-construction.html#insert-a-character
  723.          * for the exact behaviour required.
  724.          */
  725.  
  726.         if (chld->type == XML_TEXT_NODE && p->last != NULL &&
  727.                         p->last->type == XML_TEXT_NODE) {
  728.                 /* Need to clone the child, as libxml will free it if it
  729.                  * merges the content with a pre-existing text node. */
  730.                 chld = xmlCopyNode(chld, 0);
  731.                 if (chld == NULL)
  732.                         return HUBBUB_NOMEM;
  733.  
  734.                 *result = xmlAddChild(p, chld);
  735.  
  736.                 assert(*result != (void *) chld);
  737.         } else {
  738.                 *result = xmlAddChild(p, chld);
  739.         }
  740.  
  741.         if (*result == NULL)
  742.                 return HUBBUB_NOMEM;
  743.  
  744.         ref_node(ctx, *result);
  745.  
  746.         return HUBBUB_OK;
  747. }
  748.  
  749. /**
  750.  * Insert a node into another's child list
  751.  *
  752.  * \param ctx        Our context
  753.  * \param parent     The node to insert into
  754.  * \param child      The node to insert
  755.  * \param ref_child  The node to insert before
  756.  * \param result     Location to receive inserted node
  757.  * \return HUBBUB_OK on success, appropriate error otherwise
  758.  *
  759.  * Postcondition: if successful, result's reference count is increased by 1
  760.  *
  761.  * Important: *result may not == child (e.g. if text nodes got coalesced)
  762.  */
  763. hubbub_error insert_before(void *ctx, void *parent, void *child, void *ref_child,
  764.                 void **result)
  765. {
  766.         xmlNode *chld = (xmlNode *) child;
  767.         xmlNode *ref = (xmlNode *) ref_child;
  768.  
  769.         if (chld->type == XML_TEXT_NODE && ref->prev != NULL &&
  770.                         ref->prev->type == XML_TEXT_NODE) {
  771.                 /* Clone text node, as it'll be freed by libxml */
  772.                 chld = xmlCopyNode(chld, 0);
  773.                 if (chld == NULL)
  774.                         return HUBBUB_NOMEM;
  775.  
  776.                 *result = xmlAddNextSibling(ref->prev, chld);
  777.  
  778.                 assert(*result != (void *) chld);
  779.         } else {
  780.                 *result = xmlAddPrevSibling(ref, chld);
  781.         }
  782.  
  783.         if (*result == NULL)
  784.                 return HUBBUB_NOMEM;
  785.  
  786.         ref_node(ctx, *result);
  787.  
  788.         return HUBBUB_OK;
  789. }
  790.  
  791. /**
  792.  * Remove a node from another's child list
  793.  *
  794.  * \param ctx     Our context
  795.  * \param parent  The node to remove from
  796.  * \param child   The node to remove
  797.  * \param result  Location to receive removed node
  798.  * \return HUBBUB_OK on success, appropriate error otherwise
  799.  *
  800.  * Postcondition: if successful, result's reference count is increased by 1
  801.  */
  802. hubbub_error remove_child(void *ctx, void *parent, void *child, void **result)
  803. {
  804.         xmlNode *chld = (xmlNode *) child;
  805.  
  806.         xmlUnlinkNode(chld);
  807.  
  808.         *result = child;
  809.  
  810.         ref_node(ctx, *result);
  811.  
  812.         return HUBBUB_OK;
  813. }
  814.  
  815. /**
  816.  * Clone a node
  817.  *
  818.  * \param ctx     Our context
  819.  * \param node    The node to clone
  820.  * \param deep    True to clone entire subtree, false to clone only the node
  821.  * \param result  Location to receive clone
  822.  * \return HUBBUB_OK on success, appropriate error otherwise
  823.  *
  824.  * Postcondition: if successful, result's reference count must be 1.
  825.  */
  826. hubbub_error clone_node(void *ctx, void *node, bool deep, void **result)
  827. {
  828.         xmlNode *n = (xmlNode *) node;
  829.  
  830.         *result = xmlCopyNode(n, deep ? 1 : 2);
  831.  
  832.         if (*result == NULL)
  833.                 return HUBBUB_NOMEM;
  834.  
  835.         ((xmlNode *)(*result))->_private = (void *) (uintptr_t) 1;
  836.  
  837.         return HUBBUB_OK;
  838. }
  839.  
  840. /**
  841.  * Move all the children of one node to another
  842.  *
  843.  * \param ctx         Our context
  844.  * \param node        The initial parent node
  845.  * \param new_parent  The new parent node
  846.  * \return HUBBUB_OK on success, appropriate error otherwise
  847.  */
  848. hubbub_error reparent_children(void *ctx, void *node, void *new_parent)
  849. {
  850.         xmlNode *n = (xmlNode *) node;
  851.         xmlNode *p = (xmlNode *) new_parent;
  852.         xmlNode *child;
  853.  
  854.         for (child = n->children; child != NULL; ) {
  855.                 xmlNode *next = child->next;
  856.  
  857.                 xmlUnlinkNode(child);
  858.  
  859.                 if (xmlAddChild(p, child) == NULL)
  860.                         return HUBBUB_NOMEM;
  861.  
  862.                 child = next;
  863.         }
  864.  
  865.         return HUBBUB_OK;
  866. }
  867.  
  868. /**
  869.  * Retrieve the parent of a node
  870.  *
  871.  * \param ctx           Our context
  872.  * \param node          Node to retrieve the parent of
  873.  * \param element_only  True if the parent must be an element, false otherwise
  874.  * \param result        Location to receive parent node
  875.  * \return HUBBUB_OK on success, appropriate error otherwise
  876.  *
  877.  * Postcondition: if there is a parent, then result's reference count must be
  878.  * increased.
  879.  */
  880. hubbub_error get_parent(void *ctx, void *node, bool element_only, void **result)
  881. {
  882.         xmlNode *n = (xmlNode *) node;
  883.  
  884.         *result = (void *) n->parent;
  885.  
  886.         if (*result != NULL && element_only &&
  887.                         ((xmlNode *) *result)->type != XML_ELEMENT_NODE) {
  888.                 *result = NULL;
  889.         }
  890.  
  891.         if (*result != NULL)
  892.                 ref_node(ctx, *result);
  893.  
  894.         return HUBBUB_OK;
  895. }
  896.  
  897. /**
  898.  * Determine if a node has children
  899.  *
  900.  * \param ctx     Our context
  901.  * \param node    The node to inspect
  902.  * \param result  Location to receive result
  903.  * \return HUBBUB_OK on success, appropriate error otherwise
  904.  */
  905. hubbub_error has_children(void *ctx, void *node, bool *result)
  906. {
  907.         xmlNode *n = (xmlNode *) node;
  908.  
  909.         *result = n->children != NULL;
  910.  
  911.         return HUBBUB_OK;
  912. }
  913.  
  914. /**
  915.  * Associate a node with a form
  916.  *
  917.  * \param ctx   Our context
  918.  * \param form  The form to associate with
  919.  * \param node  The node to associate
  920.  * \return HUBBUB_OK on success, appropriate error otherwise
  921.  */
  922. hubbub_error form_associate(void *ctx, void *form, void *node)
  923. {
  924.         /* In this implementation, we do nothing here.
  925.          *
  926.          * If we wish to process forms afterwards, then we would want to use
  927.          * this entry point to associate inputs with form elements. This is
  928.          * useful because forms may be misnested in the source data and thus
  929.          * it is not necessarily sufficient to search the resultant DOM to
  930.          * perform the association.
  931.          *
  932.          * Note that this callback will be called even if the node has
  933.          * an @form. In that case, the association should be between the node
  934.          * and the form identified by the ID in @form. This may not be the same
  935.          * as the form passed in.
  936.          */
  937.         return HUBBUB_OK;
  938. }
  939.  
  940. /**
  941.  * Add attributes to a node
  942.  *
  943.  * \param ctx           Our context
  944.  * \param node          The node to add to
  945.  * \param attributes    Array of attributes to add
  946.  * \param n_attributes  Number of entries in array
  947.  * \return HUBBUB_OK on success, appropriate error otherwise
  948.  */
  949. hubbub_error add_attributes(void *ctx, void *node,
  950.                 const hubbub_attribute *attributes, uint32_t n_attributes)
  951. {
  952.         context *c = (context *) ctx;
  953.         xmlNode *n = (xmlNode *) node;
  954.         uint32_t attr;
  955.  
  956.         for (attr = 0; attr < n_attributes; attr++) {
  957.                 xmlAttr *prop;
  958.                 char *name, *value;
  959.  
  960.                 name = c_string_from_hubbub_string(c, &attributes[attr].name);
  961.                 if (name == NULL)
  962.                         return HUBBUB_NOMEM;
  963.  
  964.                 value = c_string_from_hubbub_string(c, &attributes[attr].value);
  965.                 if (value == NULL) {
  966.                         free(name);
  967.                         return HUBBUB_NOMEM;
  968.                 }
  969.  
  970.                 if (attributes[attr].ns != HUBBUB_NS_NULL &&
  971.                                 c->namespaces[0] != NULL) {
  972.                         prop = xmlNewNsProp(n,
  973.                                         c->namespaces[attributes[attr].ns - 1],
  974.                                         BAD_CAST name, BAD_CAST value);
  975.                 } else {
  976.                         prop = xmlNewProp(n, BAD_CAST name, BAD_CAST value);
  977.                 }
  978.                 if (prop == NULL) {
  979.                         free(value);
  980.                         free(name);
  981.                         return HUBBUB_NOMEM;
  982.                 }
  983.  
  984.                 free(value);
  985.                 free(name);
  986.         }
  987.  
  988.         return HUBBUB_OK;
  989. }
  990.  
  991. /**
  992.  * Notification of the quirks mode of a document
  993.  *
  994.  * \param ctx   Our context
  995.  * \param mode  The quirks mode
  996.  * \return HUBBUB_OK on success, appropriate error otherwise
  997.  */
  998. hubbub_error set_quirks_mode(void *ctx, hubbub_quirks_mode mode)
  999. {
  1000.         /* In this implementation, we do nothing.
  1001.          *
  1002.          * The quirks mode is really only of any use when applying CSS
  1003.          * to the resulting DOM tree.
  1004.          */
  1005.         return HUBBUB_OK;
  1006. }
  1007.  
  1008. /**
  1009.  * Notification that a potential encoding change is required
  1010.  *
  1011.  * \param ctx      Our context
  1012.  * \param charset  The new charset for the source data
  1013.  * \return HUBBUB_OK to continue using the current input handler,
  1014.  *         HUBBUB_ENCODINGCHANGE to stop processing immediately and
  1015.  *                               return control to the client,
  1016.  *         appropriate error otherwise.
  1017.  */
  1018. hubbub_error change_encoding(void *ctx, const char *charset)
  1019. {
  1020.         context *c = (context *) ctx;
  1021.         uint32_t source;
  1022.         const char *name;
  1023.  
  1024.         /* If we have an encoding here, it means we are *certain* */
  1025.         if (c->encoding != NULL) {
  1026.                 return HUBBUB_OK;
  1027.         }
  1028.  
  1029.         /* Find the confidence otherwise (can only be from a BOM) */
  1030.         name = hubbub_parser_read_charset(c->parser, &source);
  1031.  
  1032.         if (source == HUBBUB_CHARSET_CONFIDENT) {
  1033.                 c->enc_source = ENCODING_SOURCE_DETECTED;
  1034.                 c->encoding = (char *) charset;
  1035.                 return HUBBUB_OK;
  1036.         }
  1037.  
  1038.         /* So here we have something of confidence tentative... */
  1039.         /* http://www.whatwg.org/specs/web-apps/current-work/#change */
  1040.  
  1041.         /* 2. "If the new encoding is identical or equivalent to the encoding
  1042.          * that is already being used to interpret the input stream, then set
  1043.          * the confidence to confident and abort these steps." */
  1044.  
  1045.         /* Whatever happens, the encoding should be set here; either for
  1046.          * reprocessing with a different charset, or for confirming that the
  1047.          * charset is in fact correct */
  1048.         c->encoding = charset;
  1049.         c->enc_source = ENCODING_SOURCE_META;
  1050.  
  1051.         /* Equal encodings will have the same string pointers */
  1052.         return (charset == name) ? HUBBUB_OK : HUBBUB_ENCODINGCHANGE;
  1053. }
  1054.  
  1055.