Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. #ifndef UTF8_UTIL_H
  2. #define UTF8_UTIL_H
  3. /**
  4.  * UTF-8 utility functions
  5.  *
  6.  * (c) 2010 Steve Bennett <steveb@workware.net.au>
  7.  *
  8.  * See LICENCE for licence details.
  9.  */
  10.  
  11. #ifdef __cplusplus
  12. extern "C" {
  13. #endif
  14.  
  15. #ifndef USE_UTF8
  16. #include <ctype.h>
  17.  
  18. /* No utf-8 support. 1 byte = 1 char */
  19. #define utf8_strlen(S, B) ((B) < 0 ? (int)strlen(S) : (B))
  20. #define utf8_tounicode(S, CP) (*(CP) = (unsigned char)*(S), 1)
  21. #define utf8_index(C, I) (I)
  22. #define utf8_charlen(C) 1
  23.  
  24. #else
  25. /**
  26.  * Converts the given unicode codepoint (0 - 0xffff) to utf-8
  27.  * and stores the result at 'p'.
  28.  *
  29.  * Returns the number of utf-8 characters (1-3).
  30.  */
  31. int utf8_fromunicode(char *p, unsigned short uc);
  32.  
  33. /**
  34.  * Returns the length of the utf-8 sequence starting with 'c'.
  35.  *
  36.  * Returns 1-4, or -1 if this is not a valid start byte.
  37.  *
  38.  * Note that charlen=4 is not supported by the rest of the API.
  39.  */
  40. int utf8_charlen(int c);
  41.  
  42. /**
  43.  * Returns the number of characters in the utf-8
  44.  * string of the given byte length.
  45.  *
  46.  * Any bytes which are not part of an valid utf-8
  47.  * sequence are treated as individual characters.
  48.  *
  49.  * The string *must* be null terminated.
  50.  *
  51.  * Does not support unicode code points > \uffff
  52.  */
  53. int utf8_strlen(const char *str, int bytelen);
  54.  
  55. /**
  56.  * Returns the byte index of the given character in the utf-8 string.
  57.  *
  58.  * The string *must* be null terminated.
  59.  *
  60.  * This will return the byte length of a utf-8 string
  61.  * if given the char length.
  62.  */
  63. int utf8_index(const char *str, int charindex);
  64.  
  65. /**
  66.  * Returns the unicode codepoint corresponding to the
  67.  * utf-8 sequence 'str'.
  68.  *
  69.  * Stores the result in *uc and returns the number of bytes
  70.  * consumed.
  71.  *
  72.  * If 'str' is null terminated, then an invalid utf-8 sequence
  73.  * at the end of the string will be returned as individual bytes.
  74.  *
  75.  * If it is not null terminated, the length *must* be checked first.
  76.  *
  77.  * Does not support unicode code points > \uffff
  78.  */
  79. int utf8_tounicode(const char *str, int *uc);
  80.  
  81. #endif
  82.  
  83. #ifdef __cplusplus
  84. }
  85. #endif
  86.  
  87.  
  88.  
  89. #endif
  90.