Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
9837 | turbocat | 1 | /** |
2 | * @file |
||
3 | * The following is a full featured parser for configuration files using |
||
4 | * basic format "key = value". |
||
5 | * |
||
6 | * Well, it's big, but it can properly manage spaces, empty lines, |
||
7 | * single and double-quoted strings, hex numbers, comments, semicolons |
||
8 | * and more. It also happens to be much more robust than the original one. |
||
9 | * |
||
10 | * @author zamaz |
||
11 | */ |
||
12 | |||
13 | #include |
||
14 | #include |
||
15 | #include "ckvp.h" |
||
16 | |||
17 | enum { |
||
18 | STATE_ERROR = 1, |
||
19 | STATE_BEGIN, /**< initial state */ |
||
20 | STATE_COMMENT, /**< currently in a comment */ |
||
21 | STATE_KEY, /**< (key) currently in a key */ |
||
22 | STATE_KEYBS, /**< (key) backslash */ |
||
23 | STATE_KEYBSX1, /**< (key) first character of a hex value (\\x) */ |
||
24 | STATE_KEYBSX2, /**< (key) second character of a hex value (\\x) */ |
||
25 | STATE_KEYSQ, /**< (key) currently in a simple quoted key */ |
||
26 | STATE_KEYDQ, /**< (key) currently in a double quoted key */ |
||
27 | STATE_KEYDQBS, /**< (key) backslash while in double quotes */ |
||
28 | STATE_KEYDQBSX1, /**< (key) first value of \\x in double quotes */ |
||
29 | STATE_KEYDQBSX2, /**< (key) second value of \\x in double quotes */ |
||
30 | STATE_BEQ, /**< before '=' between key and value */ |
||
31 | STATE_AEQ, /**< after '=' between key and value */ |
||
32 | STATE_VALUE, /**< (value) same as (key) things above, for values */ |
||
33 | STATE_VALBS, /**< (value) backslash */ |
||
34 | STATE_VALBSX1, /**< (value) first character of an hex value (\\x) */ |
||
35 | STATE_VALBSX2, /**< (value) second character of a hex value (\\x) */ |
||
36 | STATE_VALSQ, /**< (value) currently in a simple quoted value */ |
||
37 | STATE_VALDQ, /**< (value) currently in a double quoted value */ |
||
38 | STATE_VALDQBS, /**< (value) backslash while in double quotes */ |
||
39 | STATE_VALDQBSX1, /**< (value) first value of \\x in double quotes */ |
||
40 | STATE_VALDQBSX2, /**< (value) second values of \\x in double quotes */ |
||
41 | STATE_VALEND, /**< end of a value, ready to take a new key */ |
||
42 | ACTION_KEY = 0x0100, /**< key complete */ |
||
43 | ACTION_VALUE = 0x0200, /**< value complete */ |
||
44 | ACTION_ERROR = 0x0400, /**< caught an error */ |
||
45 | ACTION_STORE = 0x1000, /**< character must be stored as is */ |
||
46 | ACTION_STORE_MOD = 0x2000, /**< store filtered character */ |
||
47 | ACTION_STORE_HEX1 = 0x4000, /**< store first hex digit */ |
||
48 | ACTION_STORE_HEX2 = 0x8000 /**< store second hex digit */ |
||
49 | }; |
||
50 | |||
51 | #define HEX_INDICES(st) \ |
||
52 | ['0'] = (st), ['1'] = (st), ['2'] = (st), ['3'] = (st), \ |
||
53 | ['4'] = (st), ['5'] = (st), ['6'] = (st), ['7'] = (st), \ |
||
54 | ['8'] = (st), ['9'] = (st), ['a'] = (st), ['b'] = (st), \ |
||
55 | ['c'] = (st), ['d'] = (st), ['e'] = (st), ['f'] = (st), \ |
||
56 | ['A'] = (st), ['B'] = (st), ['C'] = (st), ['D'] = (st), \ |
||
57 | ['E'] = (st), ['F'] = (st) |
||
58 | |||
59 | /** |
||
60 | * ckvp_parse() takes the current state (ckvp), a buffer in[size] and returns |
||
61 | * the number of characters processed. |
||
62 | * |
||
63 | * Each time ckvp_parse() returns, ckvp->state must be checked. If no error |
||
64 | * occured, ckvp_parse() must be called again with the remaining characters |
||
65 | * if any, otherwise the next input buffer. |
||
66 | * |
||
67 | * At the end of input, ckvp_parse() must be called with a zero size. |
||
68 | * |
||
69 | * This function doesn't allocate anything. |
||
70 | * |
||
71 | * @param[in,out] ckvp Current state. |
||
72 | * @param size Number of characters in buffer "in". |
||
73 | * @param in Input buffer to parse. |
||
74 | * @return Number of characters processed. |
||
75 | */ |
||
76 | size_t ckvp_parse(ckvp_t *ckvp, size_t size, const char in[]) |
||
77 | { |
||
78 | /** |
||
79 | * State machine definition: |
||
80 | * |
||
81 | * st[current_state][current_character] = next state | action |
||
82 | * |
||
83 | * Special indices for current_character are: |
||
84 | * |
||
85 | * - 0x100 for action on characters not in the list |
||
86 | * - 0x101 for action when encountering end of input while in the |
||
87 | * current state (often ACTION_ERROR) |
||
88 | */ |
||
89 | static const unsigned int st[][0x102] = { |
||
90 | [STATE_ERROR] = { |
||
91 | [0x100] = (STATE_ERROR | ACTION_ERROR), |
||
92 | [0x101] = ACTION_ERROR |
||
93 | }, |
||
94 | [STATE_BEGIN] = { |
||
95 | [' '] = STATE_BEGIN, |
||
96 | ['\f'] = STATE_BEGIN, |
||
97 | ['\n'] = STATE_BEGIN, |
||
98 | ['\r'] = STATE_BEGIN, |
||
99 | ['\t'] = STATE_BEGIN, |
||
100 | ['\v'] = STATE_BEGIN, |
||
101 | [';'] = (STATE_ERROR | ACTION_ERROR), |
||
102 | ['#'] = STATE_COMMENT, |
||
103 | ['\''] = STATE_KEYSQ, |
||
104 | ['"'] = STATE_KEYDQ, |
||
105 | ['\\'] = STATE_KEYBS, |
||
106 | ['='] = (STATE_ERROR | ACTION_ERROR), |
||
107 | [0x100] = (STATE_KEY | ACTION_STORE), |
||
108 | [0x101] = 0 |
||
109 | }, |
||
110 | [STATE_COMMENT] = { |
||
111 | ['\n'] = STATE_BEGIN, |
||
112 | [0x100] = STATE_COMMENT, |
||
113 | [0x101] = 0 |
||
114 | }, |
||
115 | [STATE_KEY] = { |
||
116 | [' '] = (STATE_BEQ | ACTION_KEY), |
||
117 | ['\f'] = (STATE_BEQ | ACTION_KEY), |
||
118 | ['\n'] = (STATE_BEQ | ACTION_KEY), |
||
119 | ['\r'] = (STATE_BEQ | ACTION_KEY), |
||
120 | ['\t'] = (STATE_BEQ | ACTION_KEY), |
||
121 | ['\v'] = (STATE_BEQ | ACTION_KEY), |
||
122 | ['\''] = STATE_KEYSQ, |
||
123 | ['\"'] = STATE_KEYDQ, |
||
124 | [';'] = (STATE_ERROR | ACTION_ERROR), |
||
125 | ['='] = (STATE_AEQ | ACTION_KEY), |
||
126 | ['#'] = (STATE_ERROR | ACTION_ERROR), |
||
127 | ['\\'] = STATE_KEYBS, |
||
128 | [0x100] = (STATE_KEY | ACTION_STORE), |
||
129 | [0x101] = ACTION_ERROR |
||
130 | }, |
||
131 | [STATE_KEYBS] = { |
||
132 | ['f'] = (STATE_KEY | ACTION_STORE_MOD), |
||
133 | ['n'] = (STATE_KEY | ACTION_STORE_MOD), |
||
134 | ['r'] = (STATE_KEY | ACTION_STORE_MOD), |
||
135 | ['t'] = (STATE_KEY | ACTION_STORE_MOD), |
||
136 | ['v'] = (STATE_KEY | ACTION_STORE_MOD), |
||
137 | ['x'] = STATE_KEYBSX1, |
||
138 | ['\n'] = STATE_KEY, |
||
139 | [0x100] = (STATE_KEY | ACTION_STORE), |
||
140 | [0x101] = ACTION_ERROR |
||
141 | }, |
||
142 | [STATE_KEYBSX1] = { |
||
143 | HEX_INDICES(STATE_KEYBSX2 | ACTION_STORE_HEX1), |
||
144 | [0x100] = (STATE_ERROR | ACTION_ERROR), |
||
145 | [0x101] = ACTION_ERROR |
||
146 | }, |
||
147 | [STATE_KEYBSX2] = { |
||
148 | HEX_INDICES(STATE_KEY | ACTION_STORE_HEX2), |
||
149 | [0x100] = (STATE_ERROR | ACTION_ERROR), |
||
150 | [0x101] = ACTION_ERROR |
||
151 | }, |
||
152 | [STATE_KEYSQ] = { |
||
153 | ['\''] = STATE_KEY, |
||
154 | [0x100] = (STATE_KEYSQ | ACTION_STORE), |
||
155 | [0x101] = ACTION_ERROR |
||
156 | }, |
||
157 | [STATE_KEYDQ] = { |
||
158 | ['"'] = STATE_KEY, |
||
159 | ['\\'] = STATE_KEYDQBS, |
||
160 | [0x100] = (STATE_KEYDQ | ACTION_STORE), |
||
161 | [0x101] = ACTION_ERROR |
||
162 | }, |
||
163 | [STATE_KEYDQBS] = { |
||
164 | ['f'] = (STATE_KEYDQ | ACTION_STORE_MOD), |
||
165 | ['n'] = (STATE_KEYDQ | ACTION_STORE_MOD), |
||
166 | ['r'] = (STATE_KEYDQ | ACTION_STORE_MOD), |
||
167 | ['t'] = (STATE_KEYDQ | ACTION_STORE_MOD), |
||
168 | ['v'] = (STATE_KEYDQ | ACTION_STORE_MOD), |
||
169 | ['x'] = STATE_KEYDQBSX1, |
||
170 | ['\n'] = STATE_KEYDQ, |
||
171 | [0x100] = (STATE_KEYDQ | ACTION_STORE), |
||
172 | [0x101] = ACTION_ERROR |
||
173 | }, |
||
174 | [STATE_KEYDQBSX1] = { |
||
175 | HEX_INDICES(STATE_KEYDQBSX2 | ACTION_STORE_HEX1), |
||
176 | [0x100] = (STATE_ERROR | ACTION_ERROR), |
||
177 | [0x101] = ACTION_ERROR |
||
178 | }, |
||
179 | [STATE_KEYDQBSX2] = { |
||
180 | HEX_INDICES(STATE_KEYDQ | ACTION_STORE_HEX2), |
||
181 | [0x100] = (STATE_ERROR | ACTION_ERROR), |
||
182 | [0x101] = ACTION_ERROR |
||
183 | }, |
||
184 | [STATE_BEQ] = { |
||
185 | [' '] = STATE_BEQ, |
||
186 | ['\f'] = STATE_BEQ, |
||
187 | ['\n'] = STATE_BEQ, |
||
188 | ['\r'] = STATE_BEQ, |
||
189 | ['\t'] = STATE_BEQ, |
||
190 | ['\v'] = STATE_BEQ, |
||
191 | ['='] = STATE_AEQ, |
||
192 | [0x100] = (STATE_ERROR | ACTION_ERROR), |
||
193 | [0x101] = ACTION_ERROR |
||
194 | }, |
||
195 | [STATE_AEQ] = { |
||
196 | [' '] = STATE_AEQ, |
||
197 | ['\f'] = STATE_AEQ, |
||
198 | ['\n'] = STATE_AEQ, |
||
199 | ['\r'] = STATE_AEQ, |
||
200 | ['\t'] = STATE_AEQ, |
||
201 | ['\v'] = STATE_AEQ, |
||
202 | ['\''] = STATE_VALSQ, |
||
203 | ['\"'] = STATE_VALDQ, |
||
204 | ['\\'] = STATE_VALBS, |
||
205 | ['='] = (STATE_ERROR | ACTION_ERROR), |
||
206 | ['#'] = (STATE_COMMENT | ACTION_VALUE), |
||
207 | [';'] = (STATE_BEGIN | ACTION_VALUE), |
||
208 | [0x100] = (STATE_VALUE | ACTION_STORE), |
||
209 | [0x101] = ACTION_VALUE |
||
210 | }, |
||
211 | [STATE_VALUE] = { |
||
212 | [' '] = (STATE_VALEND | ACTION_VALUE), |
||
213 | ['\f'] = (STATE_VALEND | ACTION_VALUE), |
||
214 | ['\n'] = (STATE_BEGIN | ACTION_VALUE), |
||
215 | ['\r'] = (STATE_VALEND | ACTION_VALUE), |
||
216 | ['\t'] = (STATE_VALEND | ACTION_VALUE), |
||
217 | ['\v'] = (STATE_VALEND | ACTION_VALUE), |
||
218 | ['\''] = STATE_VALSQ, |
||
219 | ['\"'] = STATE_VALDQ, |
||
220 | [';'] = (STATE_BEGIN | ACTION_VALUE), |
||
221 | ['='] = (STATE_ERROR | ACTION_ERROR), |
||
222 | ['#'] = (STATE_COMMENT | ACTION_VALUE), |
||
223 | ['\\'] = STATE_VALBS, |
||
224 | [0x100] = (STATE_VALUE | ACTION_STORE), |
||
225 | [0x101] = ACTION_VALUE |
||
226 | }, |
||
227 | [STATE_VALBS] = { |
||
228 | ['f'] = (STATE_VALUE | ACTION_STORE_MOD), |
||
229 | ['n'] = (STATE_VALUE | ACTION_STORE_MOD), |
||
230 | ['r'] = (STATE_VALUE | ACTION_STORE_MOD), |
||
231 | ['t'] = (STATE_VALUE | ACTION_STORE_MOD), |
||
232 | ['v'] = (STATE_VALUE | ACTION_STORE_MOD), |
||
233 | ['x'] = STATE_VALBSX1, |
||
234 | ['\n'] = STATE_VALUE, |
||
235 | [0x100] = (STATE_VALUE | ACTION_STORE), |
||
236 | [0x101] = ACTION_ERROR |
||
237 | }, |
||
238 | [STATE_VALBSX1] = { |
||
239 | HEX_INDICES(STATE_VALBSX2 | ACTION_STORE_HEX1), |
||
240 | [0x100] = (STATE_ERROR | ACTION_ERROR), |
||
241 | [0x101] = ACTION_ERROR |
||
242 | }, |
||
243 | [STATE_VALBSX2] = { |
||
244 | HEX_INDICES(STATE_VALUE | ACTION_STORE_HEX2), |
||
245 | [0x100] = (STATE_ERROR | ACTION_ERROR), |
||
246 | [0x101] = ACTION_ERROR |
||
247 | }, |
||
248 | [STATE_VALSQ] = { |
||
249 | ['\''] = STATE_VALUE, |
||
250 | [0x100] = (STATE_VALSQ | ACTION_STORE), |
||
251 | [0x101] = ACTION_ERROR |
||
252 | }, |
||
253 | [STATE_VALDQ] = { |
||
254 | ['"'] = STATE_VALUE, |
||
255 | ['\\'] = STATE_VALDQBS, |
||
256 | [0x100] = (STATE_VALDQ | ACTION_STORE), |
||
257 | [0x101] = ACTION_ERROR |
||
258 | }, |
||
259 | [STATE_VALDQBS] = { |
||
260 | ['f'] = (STATE_VALDQ | ACTION_STORE_MOD), |
||
261 | ['n'] = (STATE_VALDQ | ACTION_STORE_MOD), |
||
262 | ['r'] = (STATE_VALDQ | ACTION_STORE_MOD), |
||
263 | ['t'] = (STATE_VALDQ | ACTION_STORE_MOD), |
||
264 | ['v'] = (STATE_VALDQ | ACTION_STORE_MOD), |
||
265 | ['x'] = STATE_VALDQBSX1, |
||
266 | ['\n'] = STATE_VALDQ, |
||
267 | [0x100] = (STATE_VALDQ | ACTION_STORE), |
||
268 | [0x101] = ACTION_ERROR |
||
269 | }, |
||
270 | [STATE_VALDQBSX1] = { |
||
271 | HEX_INDICES(STATE_VALDQBSX2 | ACTION_STORE_HEX1), |
||
272 | [0x100] = (STATE_ERROR | ACTION_ERROR), |
||
273 | [0x101] = ACTION_ERROR |
||
274 | }, |
||
275 | [STATE_VALDQBSX2] = { |
||
276 | HEX_INDICES(STATE_VALDQ | ACTION_STORE_HEX2), |
||
277 | [0x100] = (STATE_ERROR | ACTION_ERROR), |
||
278 | [0x101] = ACTION_ERROR |
||
279 | }, |
||
280 | [STATE_VALEND] = { |
||
281 | [' '] = STATE_VALEND, |
||
282 | ['\f'] = STATE_VALEND, |
||
283 | ['\n'] = STATE_BEGIN, |
||
284 | ['\r'] = STATE_VALEND, |
||
285 | ['\t'] = STATE_VALEND, |
||
286 | ['\v'] = STATE_VALEND, |
||
287 | [';'] = STATE_BEGIN, |
||
288 | ['#'] = STATE_COMMENT, |
||
289 | [0x100] = (STATE_ERROR | ACTION_ERROR), |
||
290 | [0x101] = 0 |
||
291 | } |
||
292 | }; |
||
293 | static const unsigned char cv[] = { |
||
294 | ['f'] = '\f', ['n'] = '\n', ['r'] = '\r', |
||
295 | ['t'] = '\t', ['v'] = '\v' |
||
296 | }; |
||
297 | static const unsigned char hb[] = { |
||
298 | ['0'] = 0x0, ['1'] = 0x1, ['2'] = 0x2, ['3'] = 0x3, |
||
299 | ['4'] = 0x4, ['5'] = 0x5, ['6'] = 0x6, ['7'] = 0x7, |
||
300 | ['8'] = 0x8, ['9'] = 0x9, ['a'] = 0xa, ['b'] = 0xb, |
||
301 | ['c'] = 0xc, ['d'] = 0xd, ['e'] = 0xe, ['f'] = 0xf, |
||
302 | ['A'] = 0xa, ['B'] = 0xb, ['C'] = 0xc, ['D'] = 0xd, |
||
303 | ['E'] = 0xe, ['F'] = 0xf |
||
304 | }; |
||
305 | size_t i; |
||
306 | |||
307 | assert(sizeof(unsigned int) >= 4); |
||
308 | assert(ckvp != NULL); |
||
309 | assert(in != NULL); |
||
310 | if (ckvp->state != CKVP_NONE) { |
||
311 | ckvp->out_size = 0; |
||
312 | ckvp->state = CKVP_NONE; |
||
313 | } |
||
314 | if (ckvp->internal & 0x00010000) { |
||
315 | ++(ckvp->line); |
||
316 | ckvp->column = 1; |
||
317 | } |
||
318 | else if (ckvp->internal & 0x00020000) |
||
319 | ++(ckvp->column); |
||
320 | ckvp->internal &= ~(0x00030000); |
||
321 | if (size == 0) { |
||
322 | assert((ckvp->internal & 0x00ff) != 0x00); |
||
323 | assert((ckvp->internal & 0x00ff) <= STATE_VALEND); |
||
324 | if (st[(ckvp->internal & 0x00ff)][0x101] & ACTION_ERROR) |
||
325 | ckvp->state = CKVP_ERROR; |
||
326 | else if (st[(ckvp->internal & 0x00ff)][0x101] & ACTION_VALUE) |
||
327 | ckvp->state = CKVP_OUT_VALUE; |
||
328 | return 0; |
||
329 | } |
||
330 | for (i = 0; (i < size); ++i) { |
||
331 | unsigned char c = in[i]; |
||
332 | unsigned int newst; |
||
333 | |||
334 | assert((ckvp->internal & 0x00ff) != 0x00); |
||
335 | assert((ckvp->internal & 0x00ff) <= STATE_VALEND); |
||
336 | if ((newst = st[(ckvp->internal & 0x00ff)][(c & 0xff)]) == 0) |
||
337 | newst = st[(ckvp->internal & 0x00ff)][0x100]; |
||
338 | ckvp->internal = ((ckvp->internal & 0xffff0000) | newst); |
||
339 | assert(newst != 0); |
||
340 | if (newst & 0x0f00) { |
||
341 | if (newst & ACTION_ERROR) |
||
342 | ckvp->state = CKVP_ERROR; |
||
343 | else if (newst & ACTION_KEY) |
||
344 | ckvp->state = CKVP_OUT_KEY; |
||
345 | else if (newst & ACTION_VALUE) |
||
346 | ckvp->state = CKVP_OUT_VALUE; |
||
347 | goto endnl; |
||
348 | } |
||
349 | if (newst & 0xf000) { |
||
350 | if (newst & ACTION_STORE_HEX1) { |
||
351 | ckvp->internal &= ~(0x00f00000); |
||
352 | ckvp->internal |= (hb[c] << 20); |
||
353 | continue; |
||
354 | } |
||
355 | else if (newst & ACTION_STORE_HEX2) |
||
356 | c = (((ckvp->internal >> 16) & 0xf0) | hb[c]); |
||
357 | else if (newst & ACTION_STORE_MOD) |
||
358 | c = cv[c]; |
||
359 | if (ckvp->out_size == CKVP_OUT_SIZE) { |
||
360 | ckvp->out[0] = c; |
||
361 | ckvp->out_size = 1; |
||
362 | } |
||
363 | else |
||
364 | ckvp->out[((ckvp->out_size)++)] = c; |
||
365 | if (ckvp->out_size == CKVP_OUT_SIZE) { |
||
366 | ckvp->state = CKVP_OUT_FULL; |
||
367 | goto endnl; |
||
368 | } |
||
369 | } |
||
370 | if (c == '\n') { |
||
371 | ++(ckvp->line); |
||
372 | ckvp->column = 1; |
||
373 | } |
||
374 | else |
||
375 | ++(ckvp->column); |
||
376 | continue; |
||
377 | endnl: |
||
378 | if (c == '\n') |
||
379 | ckvp->internal |= 0x00010000; |
||
380 | else |
||
381 | ckvp->internal |= 0x00020000; |
||
382 | return ++i; |
||
383 | } |
||
384 | return size; |
||
385 | }><>=>>=>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> |