Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * This file is part of LibParserUtils.
  3.  * Licensed under the MIT License,
  4.  *                http://www.opensource.org/licenses/mit-license.php
  5.  * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
  6.  */
  7.  
  8. #ifndef parserutils_charset_codec_h_
  9. #define parserutils_charset_codec_h_
  10.  
  11. #ifdef __cplusplus
  12. extern "C"
  13. {
  14. #endif
  15.  
  16. #include <inttypes.h>
  17.  
  18. #include <parserutils/errors.h>
  19. #include <parserutils/functypes.h>
  20.  
  21. typedef struct parserutils_charset_codec parserutils_charset_codec;
  22.  
  23. #define PARSERUTILS_CHARSET_CODEC_NULL (0xffffffffU)
  24.  
  25. /**
  26.  * Charset codec error mode
  27.  *
  28.  * A codec's error mode determines its behaviour in the face of:
  29.  *
  30.  * + characters which are unrepresentable in the destination charset (if
  31.  *   encoding data) or which cannot be converted to UCS-4 (if decoding data).
  32.  * + invalid byte sequences (both encoding and decoding)
  33.  *
  34.  * The options provide a choice between the following approaches:
  35.  *
  36.  * + draconian, "stop processing" ("strict")
  37.  * + "replace the unrepresentable character with something else" ("loose")
  38.  * + "attempt to transliterate, or replace if unable" ("translit")
  39.  *
  40.  * The default error mode is "loose".
  41.  *
  42.  *
  43.  * In the "loose" case, the replacement character will depend upon:
  44.  *
  45.  * + Whether the operation was encoding or decoding
  46.  * + If encoding, what the destination charset is.
  47.  *
  48.  * If decoding, the replacement character will be:
  49.  *
  50.  *     U+FFFD (REPLACEMENT CHARACTER)
  51.  *
  52.  * If encoding, the replacement character will be:
  53.  *
  54.  *     U+003F (QUESTION MARK) if the destination charset is not UTF-(8|16|32)
  55.  *     U+FFFD (REPLACEMENT CHARACTER) otherwise.
  56.  *
  57.  *
  58.  * In the "translit" case, the codec will attempt to transliterate into
  59.  * the destination charset, if encoding. If decoding, or if transliteration
  60.  * fails, this option is identical to "loose".
  61.  */
  62. typedef enum parserutils_charset_codec_errormode {
  63.         /** Abort processing if unrepresentable character encountered */
  64.         PARSERUTILS_CHARSET_CODEC_ERROR_STRICT   = 0,
  65.         /** Replace unrepresentable characters with single alternate */
  66.         PARSERUTILS_CHARSET_CODEC_ERROR_LOOSE    = 1,
  67.         /** Transliterate unrepresentable characters, if possible */
  68.         PARSERUTILS_CHARSET_CODEC_ERROR_TRANSLIT = 2
  69. } parserutils_charset_codec_errormode;
  70.  
  71. /**
  72.  * Charset codec option types
  73.  */
  74. typedef enum parserutils_charset_codec_opttype {
  75.         /** Set codec error mode */
  76.         PARSERUTILS_CHARSET_CODEC_ERROR_MODE  = 1
  77. } parserutils_charset_codec_opttype;
  78.  
  79. /**
  80.  * Charset codec option parameters
  81.  */
  82. typedef union parserutils_charset_codec_optparams {
  83.         /** Parameters for error mode setting */
  84.         struct {
  85.                 /** The desired error handling mode */
  86.                 parserutils_charset_codec_errormode mode;
  87.         } error_mode;
  88. } parserutils_charset_codec_optparams;
  89.  
  90.  
  91. /* Create a charset codec */
  92. parserutils_error parserutils_charset_codec_create(const char *charset,
  93.                 parserutils_alloc alloc, void *pw,
  94.                 parserutils_charset_codec **codec);
  95. /* Destroy a charset codec */
  96. parserutils_error parserutils_charset_codec_destroy(
  97.                 parserutils_charset_codec *codec);
  98.  
  99. /* Configure a charset codec */
  100. parserutils_error parserutils_charset_codec_setopt(
  101.                 parserutils_charset_codec *codec,
  102.                 parserutils_charset_codec_opttype type,
  103.                 parserutils_charset_codec_optparams *params);
  104.  
  105. /* Encode a chunk of UCS-4 data into a codec's charset */
  106. parserutils_error parserutils_charset_codec_encode(
  107.                 parserutils_charset_codec *codec,
  108.                 const uint8_t **source, size_t *sourcelen,
  109.                 uint8_t **dest, size_t *destlen);
  110.  
  111. /* Decode a chunk of data in a codec's charset into UCS-4 */
  112. parserutils_error parserutils_charset_codec_decode(
  113.                 parserutils_charset_codec *codec,
  114.                 const uint8_t **source, size_t *sourcelen,
  115.                 uint8_t **dest, size_t *destlen);
  116.  
  117. /* Reset a charset codec */
  118. parserutils_error parserutils_charset_codec_reset(
  119.                 parserutils_charset_codec *codec);
  120.  
  121. #ifdef __cplusplus
  122. }
  123. #endif
  124.  
  125. #endif
  126.