Subversion Repositories Kolibri OS

Rev

Rev 7084 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. #include <string.h>
  2. #include <stdio.h>
  3. #include <errno.h>
  4.  
  5. typedef int conv_t;
  6. typedef unsigned int ucs4_t;
  7. typedef int iconv_t;
  8.  
  9. /* Return code if invalid input after a shift sequence of n bytes was read.
  10.    (xxx_mbtowc) */
  11. #define RET_SHIFT_ILSEQ(n)  (-1-2*(n))
  12. /* Return code if invalid. (xxx_mbtowc) */
  13. #define RET_ILSEQ           RET_SHIFT_ILSEQ(0)
  14. /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
  15. #define RET_TOOFEW(n)       (-2-2*(n))
  16.  
  17. /* Return code if invalid. (xxx_wctomb) */
  18. #define RET_ILUNI      -1
  19. /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
  20. #define RET_TOOSMALL   -2
  21.  
  22. #define CP866 0
  23. #define CP1251 1
  24. #define CP1252 2
  25. #define KOI8_RU 3
  26. #define ISO8859_5 4
  27. #define UTF_8 5
  28. #define KOI8_R 6
  29. #define ISO8859_1 7
  30.  
  31. #include "cp866.h"
  32. #include "cp1251.h"
  33. #include "cp1252.h"
  34. #include "koi8_r.h"
  35. #include "koi8_ru.h"
  36. #include "iso8859_1.h"
  37. #include "iso8859_5.h"
  38. #include "utf8.h"
  39.  
  40. int encoding(const char *someencoding) {
  41.  
  42.         char *what = strdup(someencoding);
  43.         /* Ignore //TRANSLIT or //IGNORE for now. */
  44.         int i;
  45.         for(i = 0; i < strlen(what); i++) {
  46.           if(what[i] == '/') {
  47.                 what[i] = '\0';
  48.                 break;
  49.           }
  50.         }
  51.  
  52.         if (!strcasecmp(what,"CP866")) return CP866;
  53.         if (!strcasecmp(what,"CP1251")) return CP1251;
  54.         if (!strcasecmp(what,"windows-1251")) return CP1251;
  55.         if (!strcasecmp(what,"CP1252")) return CP1252;
  56.         if (!strcasecmp(what,"windows-1252")) return CP1252;   
  57.         if (!strcasecmp(what,"KOI8-R")) return KOI8_R; 
  58.         if (!strcasecmp(what,"KOI8-RU")) return KOI8_RU;
  59.         if (!strcasecmp(what,"ISO8859-1")) return ISO8859_1;   
  60.         if (!strcasecmp(what,"ISO8859-5")) return ISO8859_5;
  61.         if (!strcasecmp(what,"UTF-8")) return UTF_8;
  62.         return -1;
  63. }
  64.  
  65. iconv_t iconv_open(const char *tocode, const char *fromcode) {
  66.         int to, from;
  67.  
  68.         if ((to=encoding(tocode))==-1) return -1;
  69.         if ((from=encoding(fromcode))==-1) return -1;
  70.  
  71.         to=to<<16&0xFFFF0000;
  72.         from=from&0xFFFF;
  73.  
  74.         return to+from;
  75. }
  76.  
  77. int iconv_close(iconv_t icd)
  78. {
  79.   return 0;
  80. }
  81.  
  82. size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft,
  83.         char **outbuf, size_t *outbytesleft)
  84. {
  85.         int n, to, from;
  86.         size_t count1,count2;
  87.         unsigned int pwc;
  88.         int converted,written;
  89.         int (*mbtowc)(conv_t, ucs4_t *, const unsigned char *, int);
  90.         int (*wctomb)(conv_t, unsigned char *, ucs4_t, int);
  91.  
  92.         to=cd>>16;
  93.         from=cd&0xFFFF;
  94.  
  95.         switch (from)
  96.         {
  97.                 case CP866: mbtowc=cp866_mbtowc; break;
  98.                 case CP1251: mbtowc=cp1251_mbtowc; break;
  99.                 case CP1252: mbtowc=cp1252_mbtowc; break;
  100.                 case ISO8859_1: mbtowc=iso8859_1_mbtowc; break;        
  101.                 case ISO8859_5: mbtowc=iso8859_5_mbtowc; break;
  102.                 case KOI8_R: mbtowc=koi8_r_mbtowc; break;              
  103.                 case KOI8_RU: mbtowc=koi8_ru_mbtowc; break;
  104.                 case UTF_8: mbtowc=utf8_mbtowc; break;
  105.                 default: return (size_t)-1;
  106.         }
  107.  
  108.         switch (to)
  109.         {
  110.                 case CP866: wctomb=cp866_wctomb; break;
  111.                 case CP1251: wctomb=cp1251_wctomb; break;
  112.                 case CP1252: wctomb=cp1252_wctomb; break;
  113.                 case ISO8859_1: wctomb=iso8859_1_wctomb; break;        
  114.                 case ISO8859_5: wctomb=iso8859_5_wctomb; break;
  115.                 case KOI8_R: wctomb=koi8_r_wctomb; break;              
  116.                 case KOI8_RU: wctomb=koi8_ru_wctomb; break;
  117.                 case UTF_8: wctomb=utf8_wctomb; break;
  118.                 default: return (size_t)-1;
  119.         }
  120.  
  121.         count1=0;
  122.         count2=0;
  123.  
  124.     /* Convert input multibyte char to wide character by using calls to mbtowc */
  125.     /* Convert wide character to multibyte by calls to wctomb */
  126.     /* Handle errors as we go on converting to be as standard compliant as possible */
  127.     while(count1 < *inbytesleft) {
  128.       unsigned char mbholder[] = { 0,0,0,0,0,0 };
  129.  
  130.       int numbytes = (mbtowc)(0, &pwc,((*inbuf)+count1), *inbytesleft - count1);
  131.       if(numbytes < 0) {
  132.         /* errno = EILSEQ if invalid multibyte sequence encountered in input */
  133.         /* errno = EINVAL if input ends in the middle of a multibyte sequence */
  134.  
  135.         switch(numbytes) {
  136.           case RET_TOOFEW(0):
  137.             errno = EINVAL;
  138.             break;
  139.  
  140.           case RET_ILSEQ:
  141.             errno = EILSEQ;
  142.             break;
  143.         }
  144.  
  145.         *inbytesleft -= count1;
  146.         *outbytesleft -= count2;
  147.         *inbuf += count1;
  148.         *outbuf += count2;
  149.         return (size_t) -1;
  150.       }
  151.  
  152.       /* Convert from wide to multibyte storing result in mbholder and num converted in numbytes2 */
  153.       /* Pass the minimum amount of space we have, one from mbholder and one from remaining in outbuf */
  154.       int minspace = sizeof(mbholder) <= (*outbytesleft - count2) ? sizeof(mbholder) : (*outbytesleft - count2);
  155.  
  156.       int numbytes2 = (wctomb)(0, &mbholder[0], pwc, minspace);
  157.       if(numbytes2 < 0) {
  158.         switch(numbytes2) {
  159.           case RET_ILUNI:
  160.             errno = EILSEQ;
  161.             break;
  162.           case RET_TOOSMALL:
  163.             errno = E2BIG;
  164.             break;
  165.         }
  166.  
  167.         *inbytesleft -= count1;
  168.         *outbytesleft -= count2;
  169.         *inbuf += count1;
  170.         *outbuf += count2;
  171.  
  172.         return (size_t) -1;
  173.       }
  174.  
  175.       int i;
  176.       for(i = 0; i < numbytes2; i++) {
  177.         *(*outbuf + count2 + i) = mbholder[i];
  178.       }
  179.  
  180.       count1+=numbytes;
  181.       count2+=numbytes2;
  182.     }
  183.  
  184.     /* Successfully converted everything, update the variables and return number of bytes converted */
  185.     *inbytesleft -= count1;
  186.     *outbytesleft -= count2;
  187.     *inbuf += count1;
  188.     *outbuf += count2;
  189.  
  190.     return count1;
  191. }
  192.  
  193. /* int main() */
  194. /* { */
  195. /*      char *s;// ="вертолет"; */
  196. /*      char *z; */
  197. /*      //unsigned int pwc; */
  198. /*      iconv_t cd; */
  199. /*      size_t in, out; */
  200.  
  201. /*     FILE *infile; */
  202. /*     char *fname = "file3.txt"; */
  203.  
  204. /*     size_t testmax = 100; */
  205. /*     size_t test = 0; */
  206.  
  207. /*     infile = fopen(fname,"r"); */
  208.  
  209. /*      fseek(infile, 0, SEEK_END); */
  210. /*      size_t file_size = ftell(infile); */
  211. /*      rewind(infile); */
  212.  
  213. /*      char *buffer = (char*)malloc(file_size * sizeof(char)); */
  214. /*      if (buffer == NULL) */
  215. /*      { */
  216. /*              fclose(infile); */
  217. /*              printf("Error allocating %d bytes.\n", file_size * sizeof(char)); */
  218. /*              return -1; */
  219. /*      } */
  220. /*      size_t bytes_read = fread(buffer, sizeof(char), file_size, infile); */
  221. /*      if (bytes_read != file_size) */
  222. /*      { */
  223. /*              /\* printf("Have read only %d bytes of %d.\n", bytes_read, file_size); *\/ */
  224. /*              free(buffer); */
  225. /*              fclose(infile); */
  226. /*              return -1; */
  227. /*      } */
  228.  
  229. /*      /\* in=strlen(buffer); *\/ */
  230. /*     in = bytes_read; */
  231. /*      z=malloc(in+12000); */
  232.  
  233. /*      out=in-1000; */
  234. /*      cd=iconv_open("UTF-8","UTF-8"); */
  235. /* //   printf("%x\n",cd); */
  236. /*      int t; */
  237. /*     char *zor = z; */
  238.  
  239. /*     /\* for(t = 0; t < 27400; t++) *\/ */
  240. /*     /\*    printf("0x%x,", buffer[t]); *\/ */
  241.  
  242. /*      t=iconv(cd, &buffer, &in, &z, &out); */
  243. /*      /\* printf("\nResult after iconv(): %d", t); *\/ */
  244.  
  245. /*     /\* for(t = 0; t < 24259; t++) *\/ */
  246. /*     /\*   printf("%c", zor[t]); *\/ */
  247.  
  248. /*      //for (;s<s+strlen(s);s++) {cp866_mbtowc (0,  &pwc, s, 1);printf("%c=%u\n",*s,pwc);} */
  249. /* } */
  250.  
  251. /* typedef struct */
  252. /* { */
  253. /*      char *name; */
  254. /*      void *f; */
  255. /* } export_t; */
  256.  
  257. /* char szStart[]           = "START"; */
  258. /* char szVersion[]         = "version"; */
  259. /* char sziconv_open[]    = "iconv_open"; */
  260. /* char sziconv[]   = "iconv"; */
  261.  
  262. /* export_t EXPORTS[] __asm__("EXPORTS") = */
  263. /* { */
  264. /*      { szStart,       (void*)0x0 }, */
  265. /*      { szVersion,     (void*)0x00010001 }, */
  266. /*      { sziconv_open,  iconv_open    }, */
  267. /*      { sziconv,       iconv   }, */
  268. /*      { NULL,          NULL }, */
  269. /* }; */
  270.