Subversion Repositories Kolibri OS

Rev

Rev 7045 | Rev 7079 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. #include <string.h>
  2. #include <stdio.h>
  3. #include <errno.h>
  4.  
  5. typedef int conv_t;
  6. typedef unsigned int ucs4_t;
  7. typedef int iconv_t;
  8.  
  9. /* Return code if invalid input after a shift sequence of n bytes was read.
  10.    (xxx_mbtowc) */
  11. #define RET_SHIFT_ILSEQ(n)  (-1-2*(n))
  12. /* Return code if invalid. (xxx_mbtowc) */
  13. #define RET_ILSEQ           RET_SHIFT_ILSEQ(0)
  14. /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
  15. #define RET_TOOFEW(n)       (-2-2*(n))
  16.  
  17. /* Return code if invalid. (xxx_wctomb) */
  18. #define RET_ILUNI      -1
  19. /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
  20. #define RET_TOOSMALL   -2
  21.  
  22. #define CP866 0
  23. #define CP1251 1
  24. #define CP1252 2
  25. #define KOI8_RU 3
  26. #define ISO8859_5 4
  27. #define UTF_8 5
  28.  
  29. #include "cp866.h"
  30. #include "cp1251.h"
  31. #include "cp1252.h"
  32. #include "koi8_ru.h"
  33. #include "iso8859_5.h"
  34. #include "utf8.h"
  35.  
  36. int encoding(const char *someencoding) {
  37.  
  38.         char *what = strdup(someencoding);
  39.         /* Ignore //TRANSLIT or //IGNORE for now. */
  40.         int i;
  41.         for(i = 0; i < strlen(what); i++) {
  42.           if(what[i] == '/') {
  43.                 what[i] = '\0';
  44.                 break;
  45.           }
  46.         }
  47.  
  48.         if (!strcasecmp(what,"CP866")) return CP866;
  49.         if (!strcasecmp(what,"CP1251")) return CP1251;
  50.         if (!strcasecmp(what,"windows-1251")) return CP1251;
  51.         if (!strcasecmp(what,"windows-1252")) return CP1252;
  52.         if (!strcasecmp(what,"CP1252")) return CP1252;
  53.         if (!strcasecmp(what,"KOI8-RU")) return KOI8_RU;
  54.         if (!strcasecmp(what,"ISO8859-5")) return ISO8859_5;
  55.         if (!strcasecmp(what,"UTF-8")) return UTF_8;
  56.         return -1;
  57. }
  58.  
  59. iconv_t iconv_open(const char *tocode, const char *fromcode) {
  60.         int to, from;
  61.  
  62.         if ((to=encoding(tocode))==-1) return -1;
  63.         if ((from=encoding(fromcode))==-1) return -1;
  64.  
  65.         to=to<<16&0xFFFF0000;
  66.         from=from&0xFFFF;
  67.  
  68.         return to+from;
  69. }
  70.  
  71. int iconv_close(iconv_t icd)
  72. {
  73.   return 0;
  74. }
  75.  
  76. size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft,
  77.         char **outbuf, size_t *outbytesleft)
  78. {
  79.         int n, to, from;
  80.         size_t count1,count2;
  81.         unsigned int pwc;
  82.         int converted,written;
  83.         int (*mbtowc)(conv_t, ucs4_t *, const unsigned char *, int);
  84.         int (*wctomb)(conv_t, unsigned char *, ucs4_t, int);
  85.  
  86.         to=cd>>16;
  87.         from=cd&0xFFFF;
  88.  
  89.         switch (from)
  90.         {
  91.                 case CP866: mbtowc=cp866_mbtowc; break;
  92.                 case CP1251: mbtowc=cp1251_mbtowc; break;
  93.                 case CP1252: mbtowc=cp1252_mbtowc; break;
  94.                 case ISO8859_5: mbtowc=iso8859_5_mbtowc; break;
  95.                 case KOI8_RU: mbtowc=koi8_ru_mbtowc; break;
  96.                 case UTF_8: mbtowc=utf8_mbtowc; break;
  97.                 default: return (size_t)-1;
  98.         }
  99.  
  100.         switch (to)
  101.         {
  102.                 case CP866: wctomb=cp866_wctomb; break;
  103.                 case CP1251: wctomb=cp1251_wctomb; break;
  104.                 case CP1252: wctomb=cp1252_wctomb; break;
  105.                 case ISO8859_5: wctomb=iso8859_5_wctomb; break;
  106.                 case KOI8_RU: wctomb=koi8_ru_wctomb; break;
  107.                 case UTF_8: wctomb=utf8_wctomb; break;
  108.                 default: return (size_t)-1;
  109.         }
  110.  
  111.         count1=0;
  112.         count2=0;
  113.  
  114.     /* Convert input multibyte char to wide character by using calls to mbtowc */
  115.     /* Convert wide character to multibyte by calls to wctomb */
  116.     /* Handle errors as we go on converting to be as standard compliant as possible */
  117.     while(count1 < *inbytesleft) {
  118.       unsigned char mbholder[] = { 0,0,0,0,0,0 };
  119.  
  120.       int numbytes = (mbtowc)(0, &pwc,((*inbuf)+count1), *inbytesleft - count1);
  121.       if(numbytes < 0) {
  122.         /* errno = EILSEQ if invalid multibyte sequence encountered in input */
  123.         /* errno = EINVAL if input ends in the middle of a multibyte sequence */
  124.  
  125.         switch(numbytes) {
  126.           case RET_TOOFEW(0):
  127.             errno = EINVAL;
  128.             break;
  129.  
  130.           case RET_ILSEQ:
  131.             errno = EILSEQ;
  132.             break;
  133.         }
  134.  
  135.         *inbytesleft -= count1;
  136.         *outbytesleft -= count2;
  137.         *inbuf += count1;
  138.         *outbuf += count2;
  139.         return (size_t) -1;
  140.       }
  141.  
  142.       /* Convert from wide to multibyte storing result in mbholder and num converted in numbytes2 */
  143.       /* Pass the minimum amount of space we have, one from mbholder and one from remaining in outbuf */
  144.       int minspace = sizeof(mbholder) <= (*outbytesleft - count2) ? sizeof(mbholder) : (*outbytesleft - count2);
  145.  
  146.       int numbytes2 = (wctomb)(0, &mbholder[0], pwc, minspace);
  147.       if(numbytes2 < 0) {
  148.         switch(numbytes2) {
  149.           case RET_ILUNI:
  150.             errno = EILSEQ;
  151.             break;
  152.           case RET_TOOSMALL:
  153.             errno = E2BIG;
  154.             break;
  155.         }
  156.  
  157.         *inbytesleft -= count1;
  158.         *outbytesleft -= count2;
  159.         *inbuf += count1;
  160.         *outbuf += count2;
  161.  
  162.         return (size_t) -1;
  163.       }
  164.  
  165.       int i;
  166.       for(i = 0; i < numbytes2; i++) {
  167.         *(*outbuf + count2 + i) = mbholder[i];
  168.       }
  169.  
  170.       count1+=numbytes;
  171.       count2+=numbytes2;
  172.     }
  173.  
  174.     /* Successfully converted everything, update the variables and return number of bytes converted */
  175.     *inbytesleft -= count1;
  176.     *outbytesleft -= count2;
  177.     *inbuf += count1;
  178.     *outbuf += count2;
  179.  
  180.     return count1;
  181. }
  182.  
  183. /* int main() */
  184. /* { */
  185. /*      char *s;// ="вертолет"; */
  186. /*      char *z; */
  187. /*      //unsigned int pwc; */
  188. /*      iconv_t cd; */
  189. /*      size_t in, out; */
  190.  
  191. /*     FILE *infile; */
  192. /*     char *fname = "file3.txt"; */
  193.  
  194. /*     size_t testmax = 100; */
  195. /*     size_t test = 0; */
  196.  
  197. /*     infile = fopen(fname,"r"); */
  198.  
  199. /*      fseek(infile, 0, SEEK_END); */
  200. /*      size_t file_size = ftell(infile); */
  201. /*      rewind(infile); */
  202.  
  203. /*      char *buffer = (char*)malloc(file_size * sizeof(char)); */
  204. /*      if (buffer == NULL) */
  205. /*      { */
  206. /*              fclose(infile); */
  207. /*              printf("Error allocating %d bytes.\n", file_size * sizeof(char)); */
  208. /*              return -1; */
  209. /*      } */
  210. /*      size_t bytes_read = fread(buffer, sizeof(char), file_size, infile); */
  211. /*      if (bytes_read != file_size) */
  212. /*      { */
  213. /*              /\* printf("Have read only %d bytes of %d.\n", bytes_read, file_size); *\/ */
  214. /*              free(buffer); */
  215. /*              fclose(infile); */
  216. /*              return -1; */
  217. /*      } */
  218.  
  219. /*      /\* in=strlen(buffer); *\/ */
  220. /*     in = bytes_read; */
  221. /*      z=malloc(in+12000); */
  222.  
  223. /*      out=in-1000; */
  224. /*      cd=iconv_open("UTF-8","UTF-8"); */
  225. /* //   printf("%x\n",cd); */
  226. /*      int t; */
  227. /*     char *zor = z; */
  228.  
  229. /*     /\* for(t = 0; t < 27400; t++) *\/ */
  230. /*     /\*    printf("0x%x,", buffer[t]); *\/ */
  231.  
  232. /*      t=iconv(cd, &buffer, &in, &z, &out); */
  233. /*      /\* printf("\nResult after iconv(): %d", t); *\/ */
  234.  
  235. /*     /\* for(t = 0; t < 24259; t++) *\/ */
  236. /*     /\*   printf("%c", zor[t]); *\/ */
  237.  
  238. /*      //for (;s<s+strlen(s);s++) {cp866_mbtowc (0,  &pwc, s, 1);printf("%c=%u\n",*s,pwc);} */
  239. /* } */
  240.  
  241. /* typedef struct */
  242. /* { */
  243. /*      char *name; */
  244. /*      void *f; */
  245. /* } export_t; */
  246.  
  247. /* char szStart[]           = "START"; */
  248. /* char szVersion[]         = "version"; */
  249. /* char sziconv_open[]    = "iconv_open"; */
  250. /* char sziconv[]   = "iconv"; */
  251.  
  252. /* export_t EXPORTS[] __asm__("EXPORTS") = */
  253. /* { */
  254. /*      { szStart,       (void*)0x0 }, */
  255. /*      { szVersion,     (void*)0x00010001 }, */
  256. /*      { sziconv_open,  iconv_open    }, */
  257. /*      { sziconv,       iconv   }, */
  258. /*      { NULL,          NULL }, */
  259. /* }; */
  260.