Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
9837 turbocat 1
/**
2
 * @file
3
 * The following is a full featured parser for configuration files using
4
 * basic format "key = value".
5
 *
6
 * Well, it's big, but it can properly manage spaces, empty lines,
7
 * single and double-quoted strings, hex numbers, comments, semicolons
8
 * and more. It also happens to be much more robust than the original one.
9
 *
10
 * @author zamaz
11
 */
12
 
13
#include 
14
#include 
15
#include "ckvp.h"
16
 
17
enum {
18
	STATE_ERROR = 1,
19
	STATE_BEGIN,     /**< initial state */
20
	STATE_COMMENT,   /**< currently in a comment */
21
	STATE_KEY,       /**< (key) currently in a key */
22
	STATE_KEYBS,     /**< (key) backslash */
23
	STATE_KEYBSX1,   /**< (key) first character of a hex value (\\x) */
24
	STATE_KEYBSX2,   /**< (key) second character of a hex value (\\x) */
25
	STATE_KEYSQ,     /**< (key) currently in a simple quoted key */
26
	STATE_KEYDQ,     /**< (key) currently in a double quoted key */
27
	STATE_KEYDQBS,   /**< (key) backslash while in double quotes */
28
	STATE_KEYDQBSX1, /**< (key) first value of \\x in double quotes */
29
	STATE_KEYDQBSX2, /**< (key) second value of \\x in double quotes */
30
	STATE_BEQ,       /**< before '=' between key and value */
31
	STATE_AEQ,       /**< after '=' between key and value */
32
	STATE_VALUE,     /**< (value) same as (key) things above, for values */
33
	STATE_VALBS,     /**< (value) backslash */
34
	STATE_VALBSX1,   /**< (value) first character of an hex value (\\x) */
35
	STATE_VALBSX2,   /**< (value) second character of a hex value (\\x) */
36
	STATE_VALSQ,     /**< (value) currently in a simple quoted value */
37
	STATE_VALDQ,     /**< (value) currently in a double quoted value */
38
	STATE_VALDQBS,   /**< (value) backslash while in double quotes */
39
	STATE_VALDQBSX1, /**< (value) first value of \\x in double quotes */
40
	STATE_VALDQBSX2, /**< (value) second values of \\x in double quotes */
41
	STATE_VALEND,    /**< end of a value, ready to take a new key */
42
	ACTION_KEY        = 0x0100, /**< key complete */
43
	ACTION_VALUE      = 0x0200, /**< value complete */
44
	ACTION_ERROR      = 0x0400, /**< caught an error */
45
	ACTION_STORE      = 0x1000, /**< character must be stored as is */
46
	ACTION_STORE_MOD  = 0x2000, /**< store filtered character */
47
	ACTION_STORE_HEX1 = 0x4000, /**< store first hex digit */
48
	ACTION_STORE_HEX2 = 0x8000  /**< store second hex digit */
49
};
50
 
51
#define	HEX_INDICES(st)						\
52
	['0'] = (st), ['1'] = (st), ['2'] = (st), ['3'] = (st),	\
53
	['4'] = (st), ['5'] = (st), ['6'] = (st), ['7'] = (st),	\
54
	['8'] = (st), ['9'] = (st), ['a'] = (st), ['b'] = (st),	\
55
	['c'] = (st), ['d'] = (st), ['e'] = (st), ['f'] = (st),	\
56
	['A'] = (st), ['B'] = (st), ['C'] = (st), ['D'] = (st),	\
57
	['E'] = (st), ['F'] = (st)
58
 
59
/**
60
 * ckvp_parse() takes the current state (ckvp), a buffer in[size] and returns
61
 * the number of characters processed.
62
 *
63
 * Each time ckvp_parse() returns, ckvp->state must be checked. If no error
64
 * occured, ckvp_parse() must be called again with the remaining characters
65
 * if any, otherwise the next input buffer.
66
 *
67
 * At the end of input, ckvp_parse() must be called with a zero size.
68
 *
69
 * This function doesn't allocate anything.
70
 *
71
 * @param[in,out] ckvp Current state.
72
 * @param size Number of characters in buffer "in".
73
 * @param in Input buffer to parse.
74
 * @return Number of characters processed.
75
 */
76
size_t ckvp_parse(ckvp_t *ckvp, size_t size, const char in[])
77
{
78
	/**
79
	 * State machine definition:
80
	 *
81
	 * st[current_state][current_character] = next state | action
82
	 *
83
	 * Special indices for current_character are:
84
	 *
85
	 * - 0x100 for action on characters not in the list
86
	 * - 0x101 for action when encountering end of input while in the
87
	 *         current state (often ACTION_ERROR)
88
	 */
89
	static const unsigned int st[][0x102] = {
90
		[STATE_ERROR] = {
91
			[0x100] = (STATE_ERROR | ACTION_ERROR),
92
			[0x101] = ACTION_ERROR
93
		},
94
		[STATE_BEGIN] = {
95
			[' '] = STATE_BEGIN,
96
			['\f'] = STATE_BEGIN,
97
			['\n'] = STATE_BEGIN,
98
			['\r'] = STATE_BEGIN,
99
			['\t'] = STATE_BEGIN,
100
			['\v'] = STATE_BEGIN,
101
			[';'] = (STATE_ERROR | ACTION_ERROR),
102
			['#'] = STATE_COMMENT,
103
			['\''] = STATE_KEYSQ,
104
			['"'] = STATE_KEYDQ,
105
			['\\'] = STATE_KEYBS,
106
			['='] = (STATE_ERROR | ACTION_ERROR),
107
			[0x100] = (STATE_KEY | ACTION_STORE),
108
			[0x101] = 0
109
		},
110
		[STATE_COMMENT] = {
111
			['\n'] = STATE_BEGIN,
112
			[0x100] = STATE_COMMENT,
113
			[0x101] = 0
114
		},
115
		[STATE_KEY] = {
116
			[' '] = (STATE_BEQ | ACTION_KEY),
117
			['\f'] = (STATE_BEQ | ACTION_KEY),
118
			['\n'] = (STATE_BEQ | ACTION_KEY),
119
			['\r'] = (STATE_BEQ | ACTION_KEY),
120
			['\t'] = (STATE_BEQ | ACTION_KEY),
121
			['\v'] = (STATE_BEQ | ACTION_KEY),
122
			['\''] = STATE_KEYSQ,
123
			['\"'] = STATE_KEYDQ,
124
			[';'] = (STATE_ERROR | ACTION_ERROR),
125
			['='] = (STATE_AEQ | ACTION_KEY),
126
			['#'] = (STATE_ERROR | ACTION_ERROR),
127
			['\\'] = STATE_KEYBS,
128
			[0x100] = (STATE_KEY | ACTION_STORE),
129
			[0x101] = ACTION_ERROR
130
		},
131
		[STATE_KEYBS] = {
132
			['f'] = (STATE_KEY | ACTION_STORE_MOD),
133
			['n'] = (STATE_KEY | ACTION_STORE_MOD),
134
			['r'] = (STATE_KEY | ACTION_STORE_MOD),
135
			['t'] = (STATE_KEY | ACTION_STORE_MOD),
136
			['v'] = (STATE_KEY | ACTION_STORE_MOD),
137
			['x'] = STATE_KEYBSX1,
138
			['\n'] = STATE_KEY,
139
			[0x100] = (STATE_KEY | ACTION_STORE),
140
			[0x101] = ACTION_ERROR
141
		},
142
		[STATE_KEYBSX1] = {
143
			HEX_INDICES(STATE_KEYBSX2 | ACTION_STORE_HEX1),
144
			[0x100] = (STATE_ERROR | ACTION_ERROR),
145
			[0x101] = ACTION_ERROR
146
		},
147
		[STATE_KEYBSX2] = {
148
			HEX_INDICES(STATE_KEY | ACTION_STORE_HEX2),
149
			[0x100] = (STATE_ERROR | ACTION_ERROR),
150
			[0x101] = ACTION_ERROR
151
		},
152
		[STATE_KEYSQ] = {
153
			['\''] = STATE_KEY,
154
			[0x100] = (STATE_KEYSQ | ACTION_STORE),
155
			[0x101] = ACTION_ERROR
156
		},
157
		[STATE_KEYDQ] = {
158
			['"'] = STATE_KEY,
159
			['\\'] = STATE_KEYDQBS,
160
			[0x100] = (STATE_KEYDQ | ACTION_STORE),
161
			[0x101] = ACTION_ERROR
162
		},
163
		[STATE_KEYDQBS] = {
164
			['f'] = (STATE_KEYDQ | ACTION_STORE_MOD),
165
			['n'] = (STATE_KEYDQ | ACTION_STORE_MOD),
166
			['r'] = (STATE_KEYDQ | ACTION_STORE_MOD),
167
			['t'] = (STATE_KEYDQ | ACTION_STORE_MOD),
168
			['v'] = (STATE_KEYDQ | ACTION_STORE_MOD),
169
			['x'] = STATE_KEYDQBSX1,
170
			['\n'] = STATE_KEYDQ,
171
			[0x100] = (STATE_KEYDQ | ACTION_STORE),
172
			[0x101] = ACTION_ERROR
173
		},
174
		[STATE_KEYDQBSX1] = {
175
			HEX_INDICES(STATE_KEYDQBSX2 | ACTION_STORE_HEX1),
176
			[0x100] = (STATE_ERROR | ACTION_ERROR),
177
			[0x101] = ACTION_ERROR
178
		},
179
		[STATE_KEYDQBSX2] = {
180
			HEX_INDICES(STATE_KEYDQ | ACTION_STORE_HEX2),
181
			[0x100] = (STATE_ERROR | ACTION_ERROR),
182
			[0x101] = ACTION_ERROR
183
		},
184
		[STATE_BEQ] = {
185
			[' '] = STATE_BEQ,
186
			['\f'] = STATE_BEQ,
187
			['\n'] = STATE_BEQ,
188
			['\r'] = STATE_BEQ,
189
			['\t'] = STATE_BEQ,
190
			['\v'] = STATE_BEQ,
191
			['='] = STATE_AEQ,
192
			[0x100] = (STATE_ERROR | ACTION_ERROR),
193
			[0x101] = ACTION_ERROR
194
		},
195
		[STATE_AEQ] = {
196
			[' '] = STATE_AEQ,
197
			['\f'] = STATE_AEQ,
198
			['\n'] = STATE_AEQ,
199
			['\r'] = STATE_AEQ,
200
			['\t'] = STATE_AEQ,
201
			['\v'] = STATE_AEQ,
202
			['\''] = STATE_VALSQ,
203
			['\"'] = STATE_VALDQ,
204
			['\\'] = STATE_VALBS,
205
			['='] = (STATE_ERROR | ACTION_ERROR),
206
			['#'] = (STATE_COMMENT | ACTION_VALUE),
207
			[';'] = (STATE_BEGIN | ACTION_VALUE),
208
			[0x100] = (STATE_VALUE | ACTION_STORE),
209
			[0x101] = ACTION_VALUE
210
		},
211
		[STATE_VALUE] = {
212
			[' '] = (STATE_VALEND | ACTION_VALUE),
213
			['\f'] = (STATE_VALEND | ACTION_VALUE),
214
			['\n'] = (STATE_BEGIN | ACTION_VALUE),
215
			['\r'] = (STATE_VALEND | ACTION_VALUE),
216
			['\t'] = (STATE_VALEND | ACTION_VALUE),
217
			['\v'] = (STATE_VALEND | ACTION_VALUE),
218
			['\''] = STATE_VALSQ,
219
			['\"'] = STATE_VALDQ,
220
			[';'] = (STATE_BEGIN | ACTION_VALUE),
221
			['='] = (STATE_ERROR | ACTION_ERROR),
222
			['#'] = (STATE_COMMENT | ACTION_VALUE),
223
			['\\'] = STATE_VALBS,
224
			[0x100] = (STATE_VALUE | ACTION_STORE),
225
			[0x101] = ACTION_VALUE
226
		},
227
		[STATE_VALBS] = {
228
			['f'] = (STATE_VALUE | ACTION_STORE_MOD),
229
			['n'] = (STATE_VALUE | ACTION_STORE_MOD),
230
			['r'] = (STATE_VALUE | ACTION_STORE_MOD),
231
			['t'] = (STATE_VALUE | ACTION_STORE_MOD),
232
			['v'] = (STATE_VALUE | ACTION_STORE_MOD),
233
			['x'] = STATE_VALBSX1,
234
			['\n'] = STATE_VALUE,
235
			[0x100] = (STATE_VALUE | ACTION_STORE),
236
			[0x101] = ACTION_ERROR
237
		},
238
		[STATE_VALBSX1] = {
239
			HEX_INDICES(STATE_VALBSX2 | ACTION_STORE_HEX1),
240
			[0x100] = (STATE_ERROR | ACTION_ERROR),
241
			[0x101] = ACTION_ERROR
242
		},
243
		[STATE_VALBSX2] = {
244
			HEX_INDICES(STATE_VALUE | ACTION_STORE_HEX2),
245
			[0x100] = (STATE_ERROR | ACTION_ERROR),
246
			[0x101] = ACTION_ERROR
247
		},
248
		[STATE_VALSQ] = {
249
			['\''] = STATE_VALUE,
250
			[0x100] = (STATE_VALSQ | ACTION_STORE),
251
			[0x101] = ACTION_ERROR
252
		},
253
		[STATE_VALDQ] = {
254
			['"'] = STATE_VALUE,
255
			['\\'] = STATE_VALDQBS,
256
			[0x100] = (STATE_VALDQ | ACTION_STORE),
257
			[0x101] = ACTION_ERROR
258
		},
259
		[STATE_VALDQBS] = {
260
			['f'] = (STATE_VALDQ | ACTION_STORE_MOD),
261
			['n'] = (STATE_VALDQ | ACTION_STORE_MOD),
262
			['r'] = (STATE_VALDQ | ACTION_STORE_MOD),
263
			['t'] = (STATE_VALDQ | ACTION_STORE_MOD),
264
			['v'] = (STATE_VALDQ | ACTION_STORE_MOD),
265
			['x'] = STATE_VALDQBSX1,
266
			['\n'] = STATE_VALDQ,
267
			[0x100] = (STATE_VALDQ | ACTION_STORE),
268
			[0x101] = ACTION_ERROR
269
		},
270
		[STATE_VALDQBSX1] = {
271
			HEX_INDICES(STATE_VALDQBSX2 | ACTION_STORE_HEX1),
272
			[0x100] = (STATE_ERROR | ACTION_ERROR),
273
			[0x101] = ACTION_ERROR
274
		},
275
		[STATE_VALDQBSX2] = {
276
			HEX_INDICES(STATE_VALDQ | ACTION_STORE_HEX2),
277
			[0x100] = (STATE_ERROR | ACTION_ERROR),
278
			[0x101] = ACTION_ERROR
279
		},
280
		[STATE_VALEND] = {
281
			[' '] = STATE_VALEND,
282
			['\f'] = STATE_VALEND,
283
			['\n'] = STATE_BEGIN,
284
			['\r'] = STATE_VALEND,
285
			['\t'] = STATE_VALEND,
286
			['\v'] = STATE_VALEND,
287
			[';'] = STATE_BEGIN,
288
			['#'] = STATE_COMMENT,
289
			[0x100] = (STATE_ERROR | ACTION_ERROR),
290
			[0x101] = 0
291
		}
292
	};
293
	static const unsigned char cv[] = {
294
		['f'] = '\f', ['n'] = '\n', ['r'] = '\r',
295
		['t'] = '\t', ['v'] = '\v'
296
	};
297
	static const unsigned char hb[] = {
298
		['0'] = 0x0, ['1'] = 0x1, ['2'] = 0x2, ['3'] = 0x3,
299
		['4'] = 0x4, ['5'] = 0x5, ['6'] = 0x6, ['7'] = 0x7,
300
		['8'] = 0x8, ['9'] = 0x9, ['a'] = 0xa, ['b'] = 0xb,
301
		['c'] = 0xc, ['d'] = 0xd, ['e'] = 0xe, ['f'] = 0xf,
302
		['A'] = 0xa, ['B'] = 0xb, ['C'] = 0xc, ['D'] = 0xd,
303
		['E'] = 0xe, ['F'] = 0xf
304
	};
305
	size_t i;
306
 
307
	assert(sizeof(unsigned int) >= 4);
308
	assert(ckvp != NULL);
309
	assert(in != NULL);
310
	if (ckvp->state != CKVP_NONE) {
311
		ckvp->out_size = 0;
312
		ckvp->state = CKVP_NONE;
313
	}
314
	if (ckvp->internal & 0x00010000) {
315
		++(ckvp->line);
316
		ckvp->column = 1;
317
	}
318
	else if (ckvp->internal & 0x00020000)
319
		++(ckvp->column);
320
	ckvp->internal &= ~(0x00030000);
321
	if (size == 0) {
322
		assert((ckvp->internal & 0x00ff) != 0x00);
323
		assert((ckvp->internal & 0x00ff) <= STATE_VALEND);
324
		if (st[(ckvp->internal & 0x00ff)][0x101] & ACTION_ERROR)
325
			ckvp->state = CKVP_ERROR;
326
		else if (st[(ckvp->internal & 0x00ff)][0x101] & ACTION_VALUE)
327
			ckvp->state = CKVP_OUT_VALUE;
328
		return 0;
329
	}
330
	for (i = 0; (i < size); ++i) {
331
		unsigned char c = in[i];
332
		unsigned int newst;
333
 
334
		assert((ckvp->internal & 0x00ff) != 0x00);
335
		assert((ckvp->internal & 0x00ff) <= STATE_VALEND);
336
		if ((newst = st[(ckvp->internal & 0x00ff)][(c & 0xff)]) == 0)
337
			newst = st[(ckvp->internal & 0x00ff)][0x100];
338
		ckvp->internal = ((ckvp->internal & 0xffff0000) | newst);
339
		assert(newst != 0);
340
		if (newst & 0x0f00) {
341
			if (newst & ACTION_ERROR)
342
				ckvp->state = CKVP_ERROR;
343
			else if (newst & ACTION_KEY)
344
				ckvp->state = CKVP_OUT_KEY;
345
			else if (newst & ACTION_VALUE)
346
				ckvp->state = CKVP_OUT_VALUE;
347
			goto endnl;
348
		}
349
		if (newst & 0xf000) {
350
			if (newst & ACTION_STORE_HEX1) {
351
				ckvp->internal &= ~(0x00f00000);
352
				ckvp->internal |= (hb[c] << 20);
353
				continue;
354
			}
355
			else if (newst & ACTION_STORE_HEX2)
356
				c = (((ckvp->internal >> 16) & 0xf0) | hb[c]);
357
			else if (newst & ACTION_STORE_MOD)
358
				c = cv[c];
359
			if (ckvp->out_size == CKVP_OUT_SIZE) {
360
				ckvp->out[0] = c;
361
				ckvp->out_size = 1;
362
			}
363
			else
364
				ckvp->out[((ckvp->out_size)++)] = c;
365
			if (ckvp->out_size == CKVP_OUT_SIZE) {
366
				ckvp->state = CKVP_OUT_FULL;
367
				goto endnl;
368
			}
369
		}
370
		if (c == '\n') {
371
			++(ckvp->line);
372
			ckvp->column = 1;
373
		}
374
		else
375
			++(ckvp->column);
376
		continue;
377
	endnl:
378
		if (c == '\n')
379
			ckvp->internal |= 0x00010000;
380
		else
381
			ckvp->internal |= 0x00020000;
382
		return ++i;
383
	}
384
	return size;
385
}