0,0 → 1,317 |
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ |
#include <stdio.h> |
#include <string.h> |
|
/* |
- split - divide a string into fields, like awk split() |
= int split(char *string, char *fields[], int nfields, char *sep); |
*/ |
int /* number of fields, including overflow */ |
split(string, fields, nfields, sep) |
char *string; |
char *fields[]; /* list is not NULL-terminated */ |
int nfields; /* number of entries available in fields[] */ |
char *sep; /* "" white, "c" single char, "ab" [ab]+ */ |
{ |
register char *p = string; |
register char c; /* latest character */ |
register char sepc = sep[0]; |
register char sepc2; |
register int fn; |
register char **fp = fields; |
register char *sepp; |
register int trimtrail; |
|
/* white space */ |
if (sepc == '\0') { |
while ((c = *p++) == ' ' || c == '\t') |
continue; |
p--; |
trimtrail = 1; |
sep = " \t"; /* note, code below knows this is 2 long */ |
sepc = ' '; |
} else |
trimtrail = 0; |
sepc2 = sep[1]; /* now we can safely pick this up */ |
|
/* catch empties */ |
if (*p == '\0') |
return(0); |
|
/* single separator */ |
if (sepc2 == '\0') { |
fn = nfields; |
for (;;) { |
*fp++ = p; |
fn--; |
if (fn == 0) |
break; |
while ((c = *p++) != sepc) |
if (c == '\0') |
return(nfields - fn); |
*(p-1) = '\0'; |
} |
/* we have overflowed the fields vector -- just count them */ |
fn = nfields; |
for (;;) { |
while ((c = *p++) != sepc) |
if (c == '\0') |
return(fn); |
fn++; |
} |
/* not reached */ |
} |
|
/* two separators */ |
if (sep[2] == '\0') { |
fn = nfields; |
for (;;) { |
*fp++ = p; |
fn--; |
while ((c = *p++) != sepc && c != sepc2) |
if (c == '\0') { |
if (trimtrail && **(fp-1) == '\0') |
fn++; |
return(nfields - fn); |
} |
if (fn == 0) |
break; |
*(p-1) = '\0'; |
while ((c = *p++) == sepc || c == sepc2) |
continue; |
p--; |
} |
/* we have overflowed the fields vector -- just count them */ |
fn = nfields; |
while (c != '\0') { |
while ((c = *p++) == sepc || c == sepc2) |
continue; |
p--; |
fn++; |
while ((c = *p++) != '\0' && c != sepc && c != sepc2) |
continue; |
} |
/* might have to trim trailing white space */ |
if (trimtrail) { |
p--; |
while ((c = *--p) == sepc || c == sepc2) |
continue; |
p++; |
if (*p != '\0') { |
if (fn == nfields+1) |
*p = '\0'; |
fn--; |
} |
} |
return(fn); |
} |
|
/* n separators */ |
fn = 0; |
for (;;) { |
if (fn < nfields) |
*fp++ = p; |
fn++; |
for (;;) { |
c = *p++; |
if (c == '\0') |
return(fn); |
sepp = sep; |
while ((sepc = *sepp++) != '\0' && sepc != c) |
continue; |
if (sepc != '\0') /* it was a separator */ |
break; |
} |
if (fn < nfields) |
*(p-1) = '\0'; |
for (;;) { |
c = *p++; |
sepp = sep; |
while ((sepc = *sepp++) != '\0' && sepc != c) |
continue; |
if (sepc == '\0') /* it wasn't a separator */ |
break; |
} |
p--; |
} |
|
/* not reached */ |
} |
|
#ifdef TEST_SPLIT |
|
|
/* |
* test program |
* pgm runs regression |
* pgm sep splits stdin lines by sep |
* pgm str sep splits str by sep |
* pgm str sep n splits str by sep n times |
*/ |
int |
main(argc, argv) |
int argc; |
char *argv[]; |
{ |
char buf[512]; |
register int n; |
# define MNF 10 |
char *fields[MNF]; |
|
if (argc > 4) |
for (n = atoi(argv[3]); n > 0; n--) { |
(void) strcpy(buf, argv[1]); |
} |
else if (argc > 3) |
for (n = atoi(argv[3]); n > 0; n--) { |
(void) strcpy(buf, argv[1]); |
(void) split(buf, fields, MNF, argv[2]); |
} |
else if (argc > 2) |
dosplit(argv[1], argv[2]); |
else if (argc > 1) |
while (fgets(buf, sizeof(buf), stdin) != NULL) { |
buf[strlen(buf)-1] = '\0'; /* stomp newline */ |
dosplit(buf, argv[1]); |
} |
else |
regress(); |
|
exit(0); |
} |
|
dosplit(string, seps) |
char *string; |
char *seps; |
{ |
# define NF 5 |
char *fields[NF]; |
register int nf; |
|
nf = split(string, fields, NF, seps); |
print(nf, NF, fields); |
} |
|
print(nf, nfp, fields) |
int nf; |
int nfp; |
char *fields[]; |
{ |
register int fn; |
register int bound; |
|
bound = (nf > nfp) ? nfp : nf; |
printf("%d:\t", nf); |
for (fn = 0; fn < bound; fn++) |
printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); |
} |
|
#define RNF 5 /* some table entries know this */ |
struct { |
char *str; |
char *seps; |
int nf; |
char *fi[RNF]; |
} tests[] = { |
"", " ", 0, { "" }, |
" ", " ", 2, { "", "" }, |
"x", " ", 1, { "x" }, |
"xy", " ", 1, { "xy" }, |
"x y", " ", 2, { "x", "y" }, |
"abc def g ", " ", 5, { "abc", "def", "", "g", "" }, |
" a bcd", " ", 4, { "", "", "a", "bcd" }, |
"a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, |
" a b c d ", " ", 6, { "", "a", "b", "c", "d " }, |
|
"", " _", 0, { "" }, |
" ", " _", 2, { "", "" }, |
"x", " _", 1, { "x" }, |
"x y", " _", 2, { "x", "y" }, |
"ab _ cd", " _", 2, { "ab", "cd" }, |
" a_b c ", " _", 5, { "", "a", "b", "c", "" }, |
"a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, |
" a b c d ", " _", 6, { "", "a", "b", "c", "d " }, |
|
"", " _~", 0, { "" }, |
" ", " _~", 2, { "", "" }, |
"x", " _~", 1, { "x" }, |
"x y", " _~", 2, { "x", "y" }, |
"ab _~ cd", " _~", 2, { "ab", "cd" }, |
" a_b c~", " _~", 5, { "", "a", "b", "c", "" }, |
"a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, |
"~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, |
|
"", " _~-", 0, { "" }, |
" ", " _~-", 2, { "", "" }, |
"x", " _~-", 1, { "x" }, |
"x y", " _~-", 2, { "x", "y" }, |
"ab _~- cd", " _~-", 2, { "ab", "cd" }, |
" a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, |
"a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, |
"~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, |
|
"", " ", 0, { "" }, |
" ", " ", 2, { "", "" }, |
"x", " ", 1, { "x" }, |
"xy", " ", 1, { "xy" }, |
"x y", " ", 2, { "x", "y" }, |
"abc def g ", " ", 4, { "abc", "def", "g", "" }, |
" a bcd", " ", 3, { "", "a", "bcd" }, |
"a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, |
" a b c d ", " ", 6, { "", "a", "b", "c", "d " }, |
|
"", "", 0, { "" }, |
" ", "", 0, { "" }, |
"x", "", 1, { "x" }, |
"xy", "", 1, { "xy" }, |
"x y", "", 2, { "x", "y" }, |
"abc def g ", "", 3, { "abc", "def", "g" }, |
"\t a bcd", "", 2, { "a", "bcd" }, |
" a \tb\t c ", "", 3, { "a", "b", "c" }, |
"a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, |
"a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, |
" a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, |
|
NULL, NULL, 0, { NULL }, |
}; |
|
regress() |
{ |
char buf[512]; |
register int n; |
char *fields[RNF+1]; |
register int nf; |
register int i; |
register int printit; |
register char *f; |
|
for (n = 0; tests[n].str != NULL; n++) { |
(void) strcpy(buf, tests[n].str); |
fields[RNF] = NULL; |
nf = split(buf, fields, RNF, tests[n].seps); |
printit = 0; |
if (nf != tests[n].nf) { |
printf("split `%s' by `%s' gave %d fields, not %d\n", |
tests[n].str, tests[n].seps, nf, tests[n].nf); |
printit = 1; |
} else if (fields[RNF] != NULL) { |
printf("split() went beyond array end\n"); |
printit = 1; |
} else { |
for (i = 0; i < nf && i < RNF; i++) { |
f = fields[i]; |
if (f == NULL) |
f = "(NULL)"; |
if (strcmp(f, tests[n].fi[i]) != 0) { |
printf("split `%s' by `%s', field %d is `%s', not `%s'\n", |
tests[n].str, tests[n].seps, |
i, fields[i], tests[n].fi[i]); |
printit = 1; |
} |
} |
} |
if (printit) |
print(nf, RNF, fields); |
} |
} |
#endif |