Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * This file is part of LibParserUtils.
  3.  * Licensed under the MIT License,
  4.  *                http://www.opensource.org/licenses/mit-license.php
  5.  * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
  6.  */
  7.  
  8. #ifndef parserutils_input_inputstream_h_
  9. #define parserutils_input_inputstream_h_
  10.  
  11. #ifdef __cplusplus
  12. extern "C"
  13. {
  14. #endif
  15.  
  16. #include <stdbool.h>
  17. #ifndef NDEBUG
  18. #include <stdio.h>
  19. #endif
  20. #include <stdlib.h>
  21. #include <inttypes.h>
  22.  
  23. #include <parserutils/errors.h>
  24. #include <parserutils/functypes.h>
  25. #include <parserutils/types.h>
  26. #include <parserutils/charset/utf8.h>
  27. #include <parserutils/utils/buffer.h>
  28.  
  29. /**
  30.  * Type of charset detection function
  31.  */
  32. typedef parserutils_error (*parserutils_charset_detect_func)(
  33.                 const uint8_t *data, size_t len,
  34.                 uint16_t *mibenum, uint32_t *source);
  35.  
  36. /**
  37.  * Input stream object
  38.  */
  39. typedef struct parserutils_inputstream
  40. {
  41.         parserutils_buffer *utf8;       /**< Buffer containing UTF-8 data */
  42.  
  43.         uint32_t cursor;                /**< Byte offset of current position */
  44.  
  45.         bool had_eof;                   /**< Whether EOF has been reached */
  46. } parserutils_inputstream;
  47.  
  48. /* Create an input stream */
  49. parserutils_error parserutils_inputstream_create(const char *enc,
  50.                 uint32_t encsrc, parserutils_charset_detect_func csdetect,
  51.                 parserutils_alloc alloc, void *pw,
  52.                 parserutils_inputstream **stream);
  53. /* Destroy an input stream */
  54. parserutils_error parserutils_inputstream_destroy(
  55.                 parserutils_inputstream *stream);
  56.  
  57. /* Append data to an input stream */
  58. parserutils_error parserutils_inputstream_append(
  59.                 parserutils_inputstream *stream,
  60.                 const uint8_t *data, size_t len);
  61. /* Insert data into stream at current location */
  62. parserutils_error parserutils_inputstream_insert(
  63.                 parserutils_inputstream *stream,
  64.                 const uint8_t *data, size_t len);
  65.  
  66. /* Slow form of css_inputstream_peek. */
  67. parserutils_error parserutils_inputstream_peek_slow(
  68.                 parserutils_inputstream *stream,
  69.                 size_t offset, const uint8_t **ptr, size_t *length);
  70.  
  71. /**
  72.  * Look at the character in the stream that starts at
  73.  * offset bytes from the cursor
  74.  *
  75.  * \param stream  Stream to look in
  76.  * \param offset  Byte offset of start of character
  77.  * \param ptr     Pointer to location to receive pointer to character data
  78.  * \param length  Pointer to location to receive character length (in bytes)
  79.  * \return PARSERUTILS_OK on success,
  80.  *                    _NEEDDATA on reaching the end of available input,
  81.  *                    _EOF on reaching the end of all input,
  82.  *                    _BADENCODING if the input cannot be decoded,
  83.  *                    _NOMEM on memory exhaustion,
  84.  *                    _BADPARM if bad parameters are passed.
  85.  *
  86.  * Once the character pointed to by the result of this call has been advanced
  87.  * past (i.e. parserutils_inputstream_advance has caused the stream cursor to
  88.  * pass over the character), then no guarantee is made as to the validity of
  89.  * the data pointed to. Thus, any attempt to dereference the pointer after
  90.  * advancing past the data it points to is a bug.
  91.  */
  92. static inline parserutils_error parserutils_inputstream_peek(
  93.                 parserutils_inputstream *stream, size_t offset,
  94.                 const uint8_t **ptr, size_t *length)
  95. {
  96.         parserutils_error error = PARSERUTILS_OK;
  97.         const parserutils_buffer *utf8;
  98.         const uint8_t *utf8_data;
  99.         size_t len, off, utf8_len;
  100.  
  101.         if (stream == NULL || ptr == NULL || length == NULL)
  102.                 return PARSERUTILS_BADPARM;
  103.  
  104. #ifndef NDEBUG
  105. #ifdef VERBOSE_INPUTSTREAM
  106.         fprintf(stdout, "Peek: len: %zu cur: %u off: %zu\n",
  107.                         stream->utf8->length, stream->cursor, offset);
  108. #endif
  109. #ifdef RANDOMISE_INPUTSTREAM
  110.         parserutils_buffer_randomise(stream->utf8);
  111. #endif
  112. #endif
  113.  
  114.         utf8 = stream->utf8;
  115.         utf8_data = utf8->data;
  116.         utf8_len = utf8->length;
  117.         off = stream->cursor + offset;
  118.  
  119. #define IS_ASCII(x) (((x) & 0x80) == 0)
  120.  
  121.         if (off < utf8_len) {
  122.                 if (IS_ASCII(utf8_data[off])) {
  123.                         /* Early exit for ASCII case */
  124.                         (*length) = 1;
  125.                         (*ptr) = (utf8_data + off);
  126.                         return PARSERUTILS_OK;
  127.                 } else {
  128.                         error = parserutils_charset_utf8_char_byte_length(
  129.                                 utf8_data + off, &len);
  130.  
  131.                         if (error == PARSERUTILS_OK) {
  132.                                 (*length) = len;
  133.                                 (*ptr) = (utf8_data + off);
  134.                                 return PARSERUTILS_OK;
  135.                         } else if (error != PARSERUTILS_NEEDDATA) {
  136.                                 return error;
  137.                         }
  138.                 }
  139.         }
  140.  
  141. #undef IS_ASCII
  142.  
  143.         if (off != utf8_len && error != PARSERUTILS_NEEDDATA)
  144.                 abort();
  145.  
  146.         return parserutils_inputstream_peek_slow(stream, offset, ptr, length);
  147. }
  148.  
  149. /**
  150.  * Advance the stream's current position
  151.  *
  152.  * \param stream  The stream whose position to advance
  153.  * \param bytes   The number of bytes to advance
  154.  */
  155. static inline void parserutils_inputstream_advance(
  156.                 parserutils_inputstream *stream, size_t bytes)
  157. {
  158.         if (stream == NULL)
  159.                 return;
  160.  
  161. #if !defined(NDEBUG) && defined(VERBOSE_INPUTSTREAM)
  162.         fprintf(stdout, "Advance: len: %zu cur: %u bytes: %zu\n",
  163.                         stream->utf8->length, stream->cursor, bytes);
  164. #endif
  165.  
  166.         if (bytes > stream->utf8->length - stream->cursor)
  167.                 abort();
  168.  
  169.         if (stream->cursor == stream->utf8->length)
  170.                 return;
  171.  
  172.         stream->cursor += bytes;
  173. }
  174.  
  175. /* Read the document charset */
  176. const char *parserutils_inputstream_read_charset(
  177.                 parserutils_inputstream *stream, uint32_t *source);
  178. /* Change the document charset */
  179. parserutils_error parserutils_inputstream_change_charset(
  180.                 parserutils_inputstream *stream,
  181.                 const char *enc, uint32_t source);
  182.  
  183. #ifdef __cplusplus
  184. }
  185. #endif
  186.  
  187. #endif
  188.  
  189.