Subversion Repositories Kolibri OS

Rev

Rev 7057 | Rev 7084 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. #include <string.h>
  2. #include <stdio.h>
  3. #include <errno.h>
  4.  
  5. typedef int conv_t;
  6. typedef unsigned int ucs4_t;
  7. typedef int iconv_t;
  8.  
  9. /* Return code if invalid input after a shift sequence of n bytes was read.
  10.    (xxx_mbtowc) */
  11. #define RET_SHIFT_ILSEQ(n)  (-1-2*(n))
  12. /* Return code if invalid. (xxx_mbtowc) */
  13. #define RET_ILSEQ           RET_SHIFT_ILSEQ(0)
  14. /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
  15. #define RET_TOOFEW(n)       (-2-2*(n))
  16.  
  17. /* Return code if invalid. (xxx_wctomb) */
  18. #define RET_ILUNI      -1
  19. /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
  20. #define RET_TOOSMALL   -2
  21.  
  22. #define CP866 0
  23. #define CP1251 1
  24. #define CP1252 2
  25. #define KOI8_RU 3
  26. #define ISO8859_5 4
  27. #define UTF_8 5
  28.  
  29. #include "cp866.h"
  30. #include "cp1251.h"
  31. #include "cp1252.h"
  32. #include "koi8_ru.h"
  33. #include "iso8859_5.h"
  34. #include "utf8.h"
  35.  
  36. int encoding(const char *someencoding) {
  37.  
  38.         char *what = strdup(someencoding);
  39.         /* Ignore //TRANSLIT or //IGNORE for now. */
  40.         int i;
  41.         for(i = 0; i < strlen(what); i++) {
  42.           if(what[i] == '/') {
  43.                 what[i] = '\0';
  44.                 break;
  45.           }
  46.         }
  47.  
  48.         if (!strcasecmp(what,"CP866")) return CP866;
  49.         if (!strcasecmp(what,"CP1251")) return CP1251;
  50.         if (!strcasecmp(what,"windows-1251")) return CP1251;
  51.         if (!strcasecmp(what,"windows-1252")) return CP1252;
  52.         if (!strcasecmp(what,"CP1252")) return CP1252;
  53.         if (!strcasecmp(what,"KOI8-RU")) return KOI8_RU;
  54.         if (!strcasecmp(what,"KOI8-R")) return KOI8_RU;
  55.         if (!strcasecmp(what,"ISO8859-5")) return ISO8859_5;
  56.         if (!strcasecmp(what,"UTF-8")) return UTF_8;
  57.         return -1;
  58. }
  59.  
  60. iconv_t iconv_open(const char *tocode, const char *fromcode) {
  61.         int to, from;
  62.  
  63.         if ((to=encoding(tocode))==-1) return -1;
  64.         if ((from=encoding(fromcode))==-1) return -1;
  65.  
  66.         to=to<<16&0xFFFF0000;
  67.         from=from&0xFFFF;
  68.  
  69.         return to+from;
  70. }
  71.  
  72. int iconv_close(iconv_t icd)
  73. {
  74.   return 0;
  75. }
  76.  
  77. size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft,
  78.         char **outbuf, size_t *outbytesleft)
  79. {
  80.         int n, to, from;
  81.         size_t count1,count2;
  82.         unsigned int pwc;
  83.         int converted,written;
  84.         int (*mbtowc)(conv_t, ucs4_t *, const unsigned char *, int);
  85.         int (*wctomb)(conv_t, unsigned char *, ucs4_t, int);
  86.  
  87.         to=cd>>16;
  88.         from=cd&0xFFFF;
  89.  
  90.         switch (from)
  91.         {
  92.                 case CP866: mbtowc=cp866_mbtowc; break;
  93.                 case CP1251: mbtowc=cp1251_mbtowc; break;
  94.                 case CP1252: mbtowc=cp1252_mbtowc; break;
  95.                 case ISO8859_5: mbtowc=iso8859_5_mbtowc; break;
  96.                 case KOI8_RU: mbtowc=koi8_ru_mbtowc; break;
  97.                 case UTF_8: mbtowc=utf8_mbtowc; break;
  98.                 default: return (size_t)-1;
  99.         }
  100.  
  101.         switch (to)
  102.         {
  103.                 case CP866: wctomb=cp866_wctomb; break;
  104.                 case CP1251: wctomb=cp1251_wctomb; break;
  105.                 case CP1252: wctomb=cp1252_wctomb; break;
  106.                 case ISO8859_5: wctomb=iso8859_5_wctomb; break;
  107.                 case KOI8_RU: wctomb=koi8_ru_wctomb; break;
  108.                 case UTF_8: wctomb=utf8_wctomb; break;
  109.                 default: return (size_t)-1;
  110.         }
  111.  
  112.         count1=0;
  113.         count2=0;
  114.  
  115.     /* Convert input multibyte char to wide character by using calls to mbtowc */
  116.     /* Convert wide character to multibyte by calls to wctomb */
  117.     /* Handle errors as we go on converting to be as standard compliant as possible */
  118.     while(count1 < *inbytesleft) {
  119.       unsigned char mbholder[] = { 0,0,0,0,0,0 };
  120.  
  121.       int numbytes = (mbtowc)(0, &pwc,((*inbuf)+count1), *inbytesleft - count1);
  122.       if(numbytes < 0) {
  123.         /* errno = EILSEQ if invalid multibyte sequence encountered in input */
  124.         /* errno = EINVAL if input ends in the middle of a multibyte sequence */
  125.  
  126.         switch(numbytes) {
  127.           case RET_TOOFEW(0):
  128.             errno = EINVAL;
  129.             break;
  130.  
  131.           case RET_ILSEQ:
  132.             errno = EILSEQ;
  133.             break;
  134.         }
  135.  
  136.         *inbytesleft -= count1;
  137.         *outbytesleft -= count2;
  138.         *inbuf += count1;
  139.         *outbuf += count2;
  140.         return (size_t) -1;
  141.       }
  142.  
  143.       /* Convert from wide to multibyte storing result in mbholder and num converted in numbytes2 */
  144.       /* Pass the minimum amount of space we have, one from mbholder and one from remaining in outbuf */
  145.       int minspace = sizeof(mbholder) <= (*outbytesleft - count2) ? sizeof(mbholder) : (*outbytesleft - count2);
  146.  
  147.       int numbytes2 = (wctomb)(0, &mbholder[0], pwc, minspace);
  148.       if(numbytes2 < 0) {
  149.         switch(numbytes2) {
  150.           case RET_ILUNI:
  151.             errno = EILSEQ;
  152.             break;
  153.           case RET_TOOSMALL:
  154.             errno = E2BIG;
  155.             break;
  156.         }
  157.  
  158.         *inbytesleft -= count1;
  159.         *outbytesleft -= count2;
  160.         *inbuf += count1;
  161.         *outbuf += count2;
  162.  
  163.         return (size_t) -1;
  164.       }
  165.  
  166.       int i;
  167.       for(i = 0; i < numbytes2; i++) {
  168.         *(*outbuf + count2 + i) = mbholder[i];
  169.       }
  170.  
  171.       count1+=numbytes;
  172.       count2+=numbytes2;
  173.     }
  174.  
  175.     /* Successfully converted everything, update the variables and return number of bytes converted */
  176.     *inbytesleft -= count1;
  177.     *outbytesleft -= count2;
  178.     *inbuf += count1;
  179.     *outbuf += count2;
  180.  
  181.     return count1;
  182. }
  183.  
  184. /* int main() */
  185. /* { */
  186. /*      char *s;// ="вертолет"; */
  187. /*      char *z; */
  188. /*      //unsigned int pwc; */
  189. /*      iconv_t cd; */
  190. /*      size_t in, out; */
  191.  
  192. /*     FILE *infile; */
  193. /*     char *fname = "file3.txt"; */
  194.  
  195. /*     size_t testmax = 100; */
  196. /*     size_t test = 0; */
  197.  
  198. /*     infile = fopen(fname,"r"); */
  199.  
  200. /*      fseek(infile, 0, SEEK_END); */
  201. /*      size_t file_size = ftell(infile); */
  202. /*      rewind(infile); */
  203.  
  204. /*      char *buffer = (char*)malloc(file_size * sizeof(char)); */
  205. /*      if (buffer == NULL) */
  206. /*      { */
  207. /*              fclose(infile); */
  208. /*              printf("Error allocating %d bytes.\n", file_size * sizeof(char)); */
  209. /*              return -1; */
  210. /*      } */
  211. /*      size_t bytes_read = fread(buffer, sizeof(char), file_size, infile); */
  212. /*      if (bytes_read != file_size) */
  213. /*      { */
  214. /*              /\* printf("Have read only %d bytes of %d.\n", bytes_read, file_size); *\/ */
  215. /*              free(buffer); */
  216. /*              fclose(infile); */
  217. /*              return -1; */
  218. /*      } */
  219.  
  220. /*      /\* in=strlen(buffer); *\/ */
  221. /*     in = bytes_read; */
  222. /*      z=malloc(in+12000); */
  223.  
  224. /*      out=in-1000; */
  225. /*      cd=iconv_open("UTF-8","UTF-8"); */
  226. /* //   printf("%x\n",cd); */
  227. /*      int t; */
  228. /*     char *zor = z; */
  229.  
  230. /*     /\* for(t = 0; t < 27400; t++) *\/ */
  231. /*     /\*    printf("0x%x,", buffer[t]); *\/ */
  232.  
  233. /*      t=iconv(cd, &buffer, &in, &z, &out); */
  234. /*      /\* printf("\nResult after iconv(): %d", t); *\/ */
  235.  
  236. /*     /\* for(t = 0; t < 24259; t++) *\/ */
  237. /*     /\*   printf("%c", zor[t]); *\/ */
  238.  
  239. /*      //for (;s<s+strlen(s);s++) {cp866_mbtowc (0,  &pwc, s, 1);printf("%c=%u\n",*s,pwc);} */
  240. /* } */
  241.  
  242. /* typedef struct */
  243. /* { */
  244. /*      char *name; */
  245. /*      void *f; */
  246. /* } export_t; */
  247.  
  248. /* char szStart[]           = "START"; */
  249. /* char szVersion[]         = "version"; */
  250. /* char sziconv_open[]    = "iconv_open"; */
  251. /* char sziconv[]   = "iconv"; */
  252.  
  253. /* export_t EXPORTS[] __asm__("EXPORTS") = */
  254. /* { */
  255. /*      { szStart,       (void*)0x0 }, */
  256. /*      { szVersion,     (void*)0x00010001 }, */
  257. /*      { sziconv_open,  iconv_open    }, */
  258. /*      { sziconv,       iconv   }, */
  259. /*      { NULL,          NULL }, */
  260. /* }; */
  261.