Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /* <ctype.h> replacement macros.
  2.  
  3.    Copyright (C) 2000, 2001, 2002, 2003, 2004,
  4.    2005 Free Software Foundation, Inc.
  5.    Contributed by Zack Weinberg <zackw@stanford.edu>.
  6.  
  7. This file is part of the libiberty library.
  8. Libiberty is free software; you can redistribute it and/or
  9. modify it under the terms of the GNU Library General Public
  10. License as published by the Free Software Foundation; either
  11. version 2 of the License, or (at your option) any later version.
  12.  
  13. Libiberty is distributed in the hope that it will be useful,
  14. but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16. Library General Public License for more details.
  17.  
  18. You should have received a copy of the GNU Library General Public
  19. License along with libiberty; see the file COPYING.LIB.  If
  20. not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
  21. Boston, MA 02110-1301, USA.  */
  22.  
  23. /*
  24.  
  25. @defvr Extension HOST_CHARSET
  26. This macro indicates the basic character set and encoding used by the
  27. host: more precisely, the encoding used for character constants in
  28. preprocessor @samp{#if} statements (the C "execution character set").
  29. It is defined by @file{safe-ctype.h}, and will be an integer constant
  30. with one of the following values:
  31.  
  32. @ftable @code
  33. @item HOST_CHARSET_UNKNOWN
  34. The host character set is unknown - that is, not one of the next two
  35. possibilities.
  36.  
  37. @item HOST_CHARSET_ASCII
  38. The host character set is ASCII.
  39.  
  40. @item HOST_CHARSET_EBCDIC
  41. The host character set is some variant of EBCDIC.  (Only one of the
  42. nineteen EBCDIC varying characters is tested; exercise caution.)
  43. @end ftable
  44. @end defvr
  45.  
  46. @deffn  Extension ISALPHA  (@var{c})
  47. @deffnx Extension ISALNUM  (@var{c})
  48. @deffnx Extension ISBLANK  (@var{c})
  49. @deffnx Extension ISCNTRL  (@var{c})
  50. @deffnx Extension ISDIGIT  (@var{c})
  51. @deffnx Extension ISGRAPH  (@var{c})
  52. @deffnx Extension ISLOWER  (@var{c})
  53. @deffnx Extension ISPRINT  (@var{c})
  54. @deffnx Extension ISPUNCT  (@var{c})
  55. @deffnx Extension ISSPACE  (@var{c})
  56. @deffnx Extension ISUPPER  (@var{c})
  57. @deffnx Extension ISXDIGIT (@var{c})
  58.  
  59. These twelve macros are defined by @file{safe-ctype.h}.  Each has the
  60. same meaning as the corresponding macro (with name in lowercase)
  61. defined by the standard header @file{ctype.h}.  For example,
  62. @code{ISALPHA} returns true for alphabetic characters and false for
  63. others.  However, there are two differences between these macros and
  64. those provided by @file{ctype.h}:
  65.  
  66. @itemize @bullet
  67. @item These macros are guaranteed to have well-defined behavior for all
  68. values representable by @code{signed char} and @code{unsigned char}, and
  69. for @code{EOF}.
  70.  
  71. @item These macros ignore the current locale; they are true for these
  72. fixed sets of characters:
  73. @multitable {@code{XDIGIT}} {yada yada yada yada yada yada yada yada}
  74. @item @code{ALPHA}  @tab @kbd{A-Za-z}
  75. @item @code{ALNUM}  @tab @kbd{A-Za-z0-9}
  76. @item @code{BLANK}  @tab @kbd{space tab}
  77. @item @code{CNTRL}  @tab @code{!PRINT}
  78. @item @code{DIGIT}  @tab @kbd{0-9}
  79. @item @code{GRAPH}  @tab @code{ALNUM || PUNCT}
  80. @item @code{LOWER}  @tab @kbd{a-z}
  81. @item @code{PRINT}  @tab @code{GRAPH ||} @kbd{space}
  82. @item @code{PUNCT}  @tab @kbd{`~!@@#$%^&*()_-=+[@{]@}\|;:'",<.>/?}
  83. @item @code{SPACE}  @tab @kbd{space tab \n \r \f \v}
  84. @item @code{UPPER}  @tab @kbd{A-Z}
  85. @item @code{XDIGIT} @tab @kbd{0-9A-Fa-f}
  86. @end multitable
  87.  
  88. Note that, if the host character set is ASCII or a superset thereof,
  89. all these macros will return false for all values of @code{char} outside
  90. the range of 7-bit ASCII.  In particular, both ISPRINT and ISCNTRL return
  91. false for characters with numeric values from 128 to 255.
  92. @end itemize
  93. @end deffn
  94.  
  95. @deffn  Extension ISIDNUM         (@var{c})
  96. @deffnx Extension ISIDST          (@var{c})
  97. @deffnx Extension IS_VSPACE       (@var{c})
  98. @deffnx Extension IS_NVSPACE      (@var{c})
  99. @deffnx Extension IS_SPACE_OR_NUL (@var{c})
  100. @deffnx Extension IS_ISOBASIC     (@var{c})
  101. These six macros are defined by @file{safe-ctype.h} and provide
  102. additional character classes which are useful when doing lexical
  103. analysis of C or similar languages.  They are true for the following
  104. sets of characters:
  105.  
  106. @multitable {@code{SPACE_OR_NUL}} {yada yada yada yada yada yada yada yada}
  107. @item @code{IDNUM}        @tab @kbd{A-Za-z0-9_}
  108. @item @code{IDST}         @tab @kbd{A-Za-z_}
  109. @item @code{VSPACE}       @tab @kbd{\r \n}
  110. @item @code{NVSPACE}      @tab @kbd{space tab \f \v \0}
  111. @item @code{SPACE_OR_NUL} @tab @code{VSPACE || NVSPACE}
  112. @item @code{ISOBASIC}     @tab @code{VSPACE || NVSPACE || PRINT}
  113. @end multitable
  114. @end deffn
  115.  
  116. */
  117.  
  118. #include "ansidecl.h"
  119. #include <safe-ctype.h>
  120. #include <stdio.h>  /* for EOF */
  121.  
  122. #if EOF != -1
  123.  #error "<safe-ctype.h> requires EOF == -1"
  124. #endif
  125.  
  126. /* Shorthand */
  127. #define bl _sch_isblank
  128. #define cn _sch_iscntrl
  129. #define di _sch_isdigit
  130. #define is _sch_isidst
  131. #define lo _sch_islower
  132. #define nv _sch_isnvsp
  133. #define pn _sch_ispunct
  134. #define pr _sch_isprint
  135. #define sp _sch_isspace
  136. #define up _sch_isupper
  137. #define vs _sch_isvsp
  138. #define xd _sch_isxdigit
  139.  
  140. /* Masks.  */
  141. #define L  (const unsigned short) (lo|is   |pr) /* lower case letter */
  142. #define XL (const unsigned short) (lo|is|xd|pr) /* lowercase hex digit */
  143. #define U  (const unsigned short) (up|is   |pr) /* upper case letter */
  144. #define XU (const unsigned short) (up|is|xd|pr) /* uppercase hex digit */
  145. #define D  (const unsigned short) (di   |xd|pr) /* decimal digit */
  146. #define P  (const unsigned short) (pn      |pr) /* punctuation */
  147. #define _  (const unsigned short) (pn|is   |pr) /* underscore */
  148.  
  149. #define C  (const unsigned short) (         cn) /* control character */
  150. #define Z  (const unsigned short) (nv      |cn) /* NUL */
  151. #define M  (const unsigned short) (nv|sp   |cn) /* cursor movement: \f \v */
  152. #define V  (const unsigned short) (vs|sp   |cn) /* vertical space: \r \n */
  153. #define T  (const unsigned short) (nv|sp|bl|cn) /* tab */
  154. #define S  (const unsigned short) (nv|sp|bl|pr) /* space */
  155.  
  156. /* Are we ASCII? */
  157. #if HOST_CHARSET == HOST_CHARSET_ASCII
  158.  
  159. const unsigned short _sch_istable[256] =
  160. {
  161.   Z,  C,  C,  C,   C,  C,  C,  C,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
  162.   C,  T,  V,  M,   M,  V,  C,  C,   /* BS  HT  LF  VT   FF  CR  SO  SI  */
  163.   C,  C,  C,  C,   C,  C,  C,  C,   /* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */
  164.   C,  C,  C,  C,   C,  C,  C,  C,   /* CAN EM  SUB ESC  FS  GS  RS  US  */
  165.   S,  P,  P,  P,   P,  P,  P,  P,   /* SP  !   "   #    $   %   &   '   */
  166.   P,  P,  P,  P,   P,  P,  P,  P,   /* (   )   *   +    ,   -   .   /   */
  167.   D,  D,  D,  D,   D,  D,  D,  D,   /* 0   1   2   3    4   5   6   7   */
  168.   D,  D,  P,  P,   P,  P,  P,  P,   /* 8   9   :   ;    <   =   >   ?   */
  169.   P, XU, XU, XU,  XU, XU, XU,  U,   /* @   A   B   C    D   E   F   G   */
  170.   U,  U,  U,  U,   U,  U,  U,  U,   /* H   I   J   K    L   M   N   O   */
  171.   U,  U,  U,  U,   U,  U,  U,  U,   /* P   Q   R   S    T   U   V   W   */
  172.   U,  U,  U,  P,   P,  P,  P,  _,   /* X   Y   Z   [    \   ]   ^   _   */
  173.   P, XL, XL, XL,  XL, XL, XL,  L,   /* `   a   b   c    d   e   f   g   */
  174.   L,  L,  L,  L,   L,  L,  L,  L,   /* h   i   j   k    l   m   n   o   */
  175.   L,  L,  L,  L,   L,  L,  L,  L,   /* p   q   r   s    t   u   v   w   */
  176.   L,  L,  L,  P,   P,  P,  P,  C,   /* x   y   z   {    |   }   ~   DEL */
  177.  
  178.   /* high half of unsigned char is locale-specific, so all tests are
  179.      false in "C" locale */
  180.   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
  181.   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
  182.   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
  183.   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
  184.  
  185.   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
  186.   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
  187.   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
  188.   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
  189. };
  190.  
  191. const unsigned char _sch_tolower[256] =
  192. {
  193.    0,  1,  2,  3,   4,  5,  6,  7,   8,  9, 10, 11,  12, 13, 14, 15,
  194.   16, 17, 18, 19,  20, 21, 22, 23,  24, 25, 26, 27,  28, 29, 30, 31,
  195.   32, 33, 34, 35,  36, 37, 38, 39,  40, 41, 42, 43,  44, 45, 46, 47,
  196.   48, 49, 50, 51,  52, 53, 54, 55,  56, 57, 58, 59,  60, 61, 62, 63,
  197.   64,
  198.  
  199.   'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
  200.   'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  201.  
  202.   91, 92, 93, 94, 95, 96,
  203.  
  204.   'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
  205.   'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  206.  
  207.  123,124,125,126,127,
  208.  
  209.  128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
  210.  144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
  211.  160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
  212.  176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
  213.  
  214.  192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
  215.  208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
  216.  224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
  217.  240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
  218. };
  219.  
  220. const unsigned char _sch_toupper[256] =
  221. {
  222.    0,  1,  2,  3,   4,  5,  6,  7,   8,  9, 10, 11,  12, 13, 14, 15,
  223.   16, 17, 18, 19,  20, 21, 22, 23,  24, 25, 26, 27,  28, 29, 30, 31,
  224.   32, 33, 34, 35,  36, 37, 38, 39,  40, 41, 42, 43,  44, 45, 46, 47,
  225.   48, 49, 50, 51,  52, 53, 54, 55,  56, 57, 58, 59,  60, 61, 62, 63,
  226.   64,
  227.  
  228.   'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  229.   'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  230.  
  231.   91, 92, 93, 94, 95, 96,
  232.  
  233.   'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  234.   'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  235.  
  236.  123,124,125,126,127,
  237.  
  238.  128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
  239.  144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
  240.  160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
  241.  176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
  242.  
  243.  192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
  244.  208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
  245.  224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
  246.  240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
  247. };
  248.  
  249. #else
  250. # if HOST_CHARSET == HOST_CHARSET_EBCDIC
  251.   #error "FIXME: write tables for EBCDIC"
  252. # else
  253.   #error "Unrecognized host character set"
  254. # endif
  255. #endif
  256.