Subversion Repositories Kolibri OS

Rev

Rev 7079 | Rev 7085 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. #include <string.h>
  2. #include <stdio.h>
  3. #include <errno.h>
  4.  
  5. typedef int conv_t;
  6. typedef unsigned int ucs4_t;
  7. typedef int iconv_t;
  8.  
  9. /* Return code if invalid input after a shift sequence of n bytes was read.
  10.    (xxx_mbtowc) */
  11. #define RET_SHIFT_ILSEQ(n)  (-1-2*(n))
  12. /* Return code if invalid. (xxx_mbtowc) */
  13. #define RET_ILSEQ           RET_SHIFT_ILSEQ(0)
  14. /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
  15. #define RET_TOOFEW(n)       (-2-2*(n))
  16.  
  17. /* Return code if invalid. (xxx_wctomb) */
  18. #define RET_ILUNI      -1
  19. /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
  20. #define RET_TOOSMALL   -2
  21.  
  22. #define CP866 0
  23. #define CP1251 1
  24. #define CP1252 2
  25. #define KOI8_RU 3
  26. #define ISO8859_5 4
  27. #define UTF_8 5
  28. #define KOI8_R 6
  29.  
  30. #include "cp866.h"
  31. #include "cp1251.h"
  32. #include "cp1252.h"
  33. #include "koi8_r.h"
  34. #include "koi8_ru.h"
  35. #include "iso8859_5.h"
  36. #include "utf8.h"
  37.  
  38. int encoding(const char *someencoding) {
  39.  
  40.         char *what = strdup(someencoding);
  41.         /* Ignore //TRANSLIT or //IGNORE for now. */
  42.         int i;
  43.         for(i = 0; i < strlen(what); i++) {
  44.           if(what[i] == '/') {
  45.                 what[i] = '\0';
  46.                 break;
  47.           }
  48.         }
  49.  
  50.         if (!strcasecmp(what,"CP866")) return CP866;
  51.         if (!strcasecmp(what,"CP1251")) return CP1251;
  52.         if (!strcasecmp(what,"windows-1251")) return CP1251;
  53.         if (!strcasecmp(what,"CP1252")) return CP1252;
  54.         if (!strcasecmp(what,"windows-1252")) return CP1252;   
  55.         if (!strcasecmp(what,"KOI8-R")) return KOI8_R; 
  56.         if (!strcasecmp(what,"KOI8-RU")) return KOI8_RU;
  57.         if (!strcasecmp(what,"ISO8859-5")) return ISO8859_5;
  58.         if (!strcasecmp(what,"UTF-8")) return UTF_8;
  59.         return -1;
  60. }
  61.  
  62. iconv_t iconv_open(const char *tocode, const char *fromcode) {
  63.         int to, from;
  64.  
  65.         if ((to=encoding(tocode))==-1) return -1;
  66.         if ((from=encoding(fromcode))==-1) return -1;
  67.  
  68.         to=to<<16&0xFFFF0000;
  69.         from=from&0xFFFF;
  70.  
  71.         return to+from;
  72. }
  73.  
  74. int iconv_close(iconv_t icd)
  75. {
  76.   return 0;
  77. }
  78.  
  79. size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft,
  80.         char **outbuf, size_t *outbytesleft)
  81. {
  82.         int n, to, from;
  83.         size_t count1,count2;
  84.         unsigned int pwc;
  85.         int converted,written;
  86.         int (*mbtowc)(conv_t, ucs4_t *, const unsigned char *, int);
  87.         int (*wctomb)(conv_t, unsigned char *, ucs4_t, int);
  88.  
  89.         to=cd>>16;
  90.         from=cd&0xFFFF;
  91.  
  92.         switch (from)
  93.         {
  94.                 case CP866: mbtowc=cp866_mbtowc; break;
  95.                 case CP1251: mbtowc=cp1251_mbtowc; break;
  96.                 case CP1252: mbtowc=cp1252_mbtowc; break;
  97.                 case ISO8859_5: mbtowc=iso8859_5_mbtowc; break;
  98.                 case KOI8_R: mbtowc=koi8_r_mbtowc; break;              
  99.                 case KOI8_RU: mbtowc=koi8_ru_mbtowc; break;
  100.                 case UTF_8: mbtowc=utf8_mbtowc; break;
  101.                 default: return (size_t)-1;
  102.         }
  103.  
  104.         switch (to)
  105.         {
  106.                 case CP866: wctomb=cp866_wctomb; break;
  107.                 case CP1251: wctomb=cp1251_wctomb; break;
  108.                 case CP1252: wctomb=cp1252_wctomb; break;
  109.                 case ISO8859_5: wctomb=iso8859_5_wctomb; break;
  110.                 case KOI8_R: wctomb=koi8_r_wctomb; break;              
  111.                 case KOI8_RU: wctomb=koi8_ru_wctomb; break;
  112.                 case UTF_8: wctomb=utf8_wctomb; break;
  113.                 default: return (size_t)-1;
  114.         }
  115.  
  116.         count1=0;
  117.         count2=0;
  118.  
  119.     /* Convert input multibyte char to wide character by using calls to mbtowc */
  120.     /* Convert wide character to multibyte by calls to wctomb */
  121.     /* Handle errors as we go on converting to be as standard compliant as possible */
  122.     while(count1 < *inbytesleft) {
  123.       unsigned char mbholder[] = { 0,0,0,0,0,0 };
  124.  
  125.       int numbytes = (mbtowc)(0, &pwc,((*inbuf)+count1), *inbytesleft - count1);
  126.       if(numbytes < 0) {
  127.         /* errno = EILSEQ if invalid multibyte sequence encountered in input */
  128.         /* errno = EINVAL if input ends in the middle of a multibyte sequence */
  129.  
  130.         switch(numbytes) {
  131.           case RET_TOOFEW(0):
  132.             errno = EINVAL;
  133.             break;
  134.  
  135.           case RET_ILSEQ:
  136.             errno = EILSEQ;
  137.             break;
  138.         }
  139.  
  140.         *inbytesleft -= count1;
  141.         *outbytesleft -= count2;
  142.         *inbuf += count1;
  143.         *outbuf += count2;
  144.         return (size_t) -1;
  145.       }
  146.  
  147.       /* Convert from wide to multibyte storing result in mbholder and num converted in numbytes2 */
  148.       /* Pass the minimum amount of space we have, one from mbholder and one from remaining in outbuf */
  149.       int minspace = sizeof(mbholder) <= (*outbytesleft - count2) ? sizeof(mbholder) : (*outbytesleft - count2);
  150.  
  151.       int numbytes2 = (wctomb)(0, &mbholder[0], pwc, minspace);
  152.       if(numbytes2 < 0) {
  153.         switch(numbytes2) {
  154.           case RET_ILUNI:
  155.             errno = EILSEQ;
  156.             break;
  157.           case RET_TOOSMALL:
  158.             errno = E2BIG;
  159.             break;
  160.         }
  161.  
  162.         *inbytesleft -= count1;
  163.         *outbytesleft -= count2;
  164.         *inbuf += count1;
  165.         *outbuf += count2;
  166.  
  167.         return (size_t) -1;
  168.       }
  169.  
  170.       int i;
  171.       for(i = 0; i < numbytes2; i++) {
  172.         *(*outbuf + count2 + i) = mbholder[i];
  173.       }
  174.  
  175.       count1+=numbytes;
  176.       count2+=numbytes2;
  177.     }
  178.  
  179.     /* Successfully converted everything, update the variables and return number of bytes converted */
  180.     *inbytesleft -= count1;
  181.     *outbytesleft -= count2;
  182.     *inbuf += count1;
  183.     *outbuf += count2;
  184.  
  185.     return count1;
  186. }
  187.  
  188. /* int main() */
  189. /* { */
  190. /*      char *s;// ="вертолет"; */
  191. /*      char *z; */
  192. /*      //unsigned int pwc; */
  193. /*      iconv_t cd; */
  194. /*      size_t in, out; */
  195.  
  196. /*     FILE *infile; */
  197. /*     char *fname = "file3.txt"; */
  198.  
  199. /*     size_t testmax = 100; */
  200. /*     size_t test = 0; */
  201.  
  202. /*     infile = fopen(fname,"r"); */
  203.  
  204. /*      fseek(infile, 0, SEEK_END); */
  205. /*      size_t file_size = ftell(infile); */
  206. /*      rewind(infile); */
  207.  
  208. /*      char *buffer = (char*)malloc(file_size * sizeof(char)); */
  209. /*      if (buffer == NULL) */
  210. /*      { */
  211. /*              fclose(infile); */
  212. /*              printf("Error allocating %d bytes.\n", file_size * sizeof(char)); */
  213. /*              return -1; */
  214. /*      } */
  215. /*      size_t bytes_read = fread(buffer, sizeof(char), file_size, infile); */
  216. /*      if (bytes_read != file_size) */
  217. /*      { */
  218. /*              /\* printf("Have read only %d bytes of %d.\n", bytes_read, file_size); *\/ */
  219. /*              free(buffer); */
  220. /*              fclose(infile); */
  221. /*              return -1; */
  222. /*      } */
  223.  
  224. /*      /\* in=strlen(buffer); *\/ */
  225. /*     in = bytes_read; */
  226. /*      z=malloc(in+12000); */
  227.  
  228. /*      out=in-1000; */
  229. /*      cd=iconv_open("UTF-8","UTF-8"); */
  230. /* //   printf("%x\n",cd); */
  231. /*      int t; */
  232. /*     char *zor = z; */
  233.  
  234. /*     /\* for(t = 0; t < 27400; t++) *\/ */
  235. /*     /\*    printf("0x%x,", buffer[t]); *\/ */
  236.  
  237. /*      t=iconv(cd, &buffer, &in, &z, &out); */
  238. /*      /\* printf("\nResult after iconv(): %d", t); *\/ */
  239.  
  240. /*     /\* for(t = 0; t < 24259; t++) *\/ */
  241. /*     /\*   printf("%c", zor[t]); *\/ */
  242.  
  243. /*      //for (;s<s+strlen(s);s++) {cp866_mbtowc (0,  &pwc, s, 1);printf("%c=%u\n",*s,pwc);} */
  244. /* } */
  245.  
  246. /* typedef struct */
  247. /* { */
  248. /*      char *name; */
  249. /*      void *f; */
  250. /* } export_t; */
  251.  
  252. /* char szStart[]           = "START"; */
  253. /* char szVersion[]         = "version"; */
  254. /* char sziconv_open[]    = "iconv_open"; */
  255. /* char sziconv[]   = "iconv"; */
  256.  
  257. /* export_t EXPORTS[] __asm__("EXPORTS") = */
  258. /* { */
  259. /*      { szStart,       (void*)0x0 }, */
  260. /*      { szVersion,     (void*)0x00010001 }, */
  261. /*      { sziconv_open,  iconv_open    }, */
  262. /*      { sziconv,       iconv   }, */
  263. /*      { NULL,          NULL }, */
  264. /* }; */
  265.