0,0 → 1,569 |
/* |
* The information in this document is subject to change |
* without notice and should not be construed as a commitment |
* by Digital Equipment Corporation or by DECUS. |
* |
* Neither Digital Equipment Corporation, DECUS, nor the authors |
* assume any responsibility for the use or reliability of this |
* document or the described software. |
* |
* Copyright (C) 1980, DECUS |
* |
* General permission to copy or modify, but not for profit, is |
* hereby granted, provided that the above copyright notice is |
* included and reference made to the fact that reproduction |
* privileges were granted by DECUS. |
*/ |
#include <stdio.h> |
#include <stdlib.h> |
#include <ctype.h> // tolower() |
|
/* |
* grep |
* |
* Runs on the Decus compiler or on vms, On vms, define as: |
* grep :== "$disk:[account]grep" (native) |
* grep :== "$disk:[account]grep grep" (Decus) |
* See below for more information. |
*/ |
|
char *documentation[] = { |
"grep searches a file for a given pattern. Execute by", |
" grep [flags] regular_expression file_list\n", |
"Flags are single characters preceded by '-':", |
" -c Only a count of matching lines is printed", |
" -f Print file name for matching lines switch, see below", |
" -n Each line is preceded by its line number", |
" -v Only print non-matching lines\n", |
"The file_list is a list of files (wildcards are acceptable on RSX modes).", |
"\nThe file name is normally printed if there is a file given.", |
"The -f flag reverses this action (print name no file, not if more).\n", |
0 }; |
|
char *patdoc[] = { |
"The regular_expression defines the pattern to search for. Upper- and", |
"lower-case are always ignored. Blank lines never match. The expression", |
"should be quoted to prevent file-name translation.", |
"x An ordinary character (not mentioned below) matches that character.", |
"'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.", |
"'^' A circumflex at the beginning of an expression matches the", |
" beginning of a line.", |
"'$' A dollar-sign at the end of an expression matches the end of a line.", |
"'.' A period matches any character except \"new-line\".", |
"':a' A colon matches a class of characters described by the following", |
"':d' character. \":a\" matches any alphabetic, \":d\" matches digits,", |
"':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and", |
"': ' other control characters, such as new-line.", |
"'*' An expression followed by an asterisk matches zero or more", |
" occurrences of that expression: \"fo*\" matches \"f\", \"fo\"", |
" \"foo\", etc.", |
"'+' An expression followed by a plus sign matches one or more", |
" occurrences of that expression: \"fo+\" matches \"fo\", etc.", |
"'-' An expression followed by a minus sign optionally matches", |
" the expression.", |
"'[]' A string enclosed in square brackets matches any character in", |
" that string, but no others. If the first character in the", |
" string is a circumflex, the expression matches any character", |
" except \"new-line\" and the characters in the string. For", |
" example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"", |
" matches \"abc\" but not \"axb\". A range of characters may be", |
" specified by two characters separated by \"-\". Note that,", |
" [a-z] matches alphabetics, while [z-a] never matches.", |
"The concatenation of regular expressions is a regular expression.", |
0}; |
|
#define LMAX 512 |
#define PMAX 256 |
|
#define CHAR 1 |
#define BOL 2 |
#define EOL 3 |
#define ANY 4 |
#define CLASS 5 |
#define NCLASS 6 |
#define STAR 7 |
#define PLUS 8 |
#define MINUS 9 |
#define ALPHA 10 |
#define DIGIT 11 |
#define NALPHA 12 |
#define PUNCT 13 |
#define RANGE 14 |
#define ENDPAT 15 |
|
int cflag=0, fflag=0, nflag=0, vflag=0, nfile=0, debug=0; |
|
char *pp, lbuf[LMAX], pbuf[PMAX]; |
|
char *cclass(); |
char *pmatch(); |
void store(int); |
void error(char *); |
void badpat(char *, char *, char *); |
int match(void); |
|
|
/*** Display a file name *******************************/ |
void file(char *s) |
{ |
printf("File %s:\n", s); |
} |
|
/*** Report unopenable file ****************************/ |
void cant(char *s) |
{ |
fprintf(stderr, "%s: cannot open\n", s); |
} |
|
/*** Give good help ************************************/ |
void help(char **hp) |
{ |
char **dp; |
|
for (dp = hp; *dp; ++dp) |
printf("%s\n", *dp); |
} |
|
/*** Display usage summary *****************************/ |
void usage(char *s) |
{ |
fprintf(stderr, "?GREP-E-%s\n", s); |
fprintf(stderr, |
"Usage: grep [-cfnv] pattern [file ...]. grep ? for help\n"); |
exit(1); |
} |
|
/*** Compile the pattern into global pbuf[] ************/ |
void compile(char *source) |
{ |
char *s; /* Source string pointer */ |
char *lp; /* Last pattern pointer */ |
int c; /* Current character */ |
int o; /* Temp */ |
char *spp; /* Save beginning of pattern */ |
|
s = source; |
if (debug) |
printf("Pattern = \"%s\"\n", s); |
pp = pbuf; |
while (c = *s++) { |
/* |
* STAR, PLUS and MINUS are special. |
*/ |
if (c == '*' || c == '+' || c == '-') { |
if (pp == pbuf || |
(o=pp[-1]) == BOL || |
o == EOL || |
o == STAR || |
o == PLUS || |
o == MINUS) |
badpat("Illegal occurrence op.", source, s); |
store(ENDPAT); |
store(ENDPAT); |
spp = pp; /* Save pattern end */ |
while (--pp > lp) /* Move pattern down */ |
*pp = pp[-1]; /* one byte */ |
*pp = (c == '*') ? STAR : |
(c == '-') ? MINUS : PLUS; |
pp = spp; /* Restore pattern end */ |
continue; |
} |
/* |
* All the rest. |
*/ |
lp = pp; /* Remember start */ |
switch(c) { |
|
case '^': |
store(BOL); |
break; |
|
case '$': |
store(EOL); |
break; |
|
case '.': |
store(ANY); |
break; |
|
case '[': |
s = cclass(source, s); |
break; |
|
case ':': |
if (*s) { |
switch(tolower(c = *s++)) { |
|
case 'a': |
case 'A': |
store(ALPHA); |
break; |
|
case 'd': |
case 'D': |
store(DIGIT); |
break; |
|
case 'n': |
case 'N': |
store(NALPHA); |
break; |
|
case ' ': |
store(PUNCT); |
break; |
|
default: |
badpat("Unknown : type", source, s); |
|
} |
break; |
} |
else badpat("No : type", source, s); |
|
case '\\': |
if (*s) |
c = *s++; |
|
default: |
store(CHAR); |
store(tolower(c)); |
} |
} |
store(ENDPAT); |
store(0); /* Terminate string */ |
if (debug) { |
for (lp = pbuf; lp < pp;) { |
if ((c = (*lp++ & 0377)) < ' ') |
printf("\\%o ", c); |
else printf("%c ", c); |
} |
printf("\n"); |
} |
} |
|
/*** Compile a class (within []) ***********************/ |
char *cclass(char *source, char *src) |
/* char *source; // Pattern start -- for error msg. */ |
/* char *src; // Class start */ |
{ |
char *s; /* Source pointer */ |
char *cp; /* Pattern start */ |
int c; /* Current character */ |
int o; /* Temp */ |
|
s = src; |
o = CLASS; |
if (*s == '^') { |
++s; |
o = NCLASS; |
} |
store(o); |
cp = pp; |
store(0); /* Byte count */ |
while ((c = *s++) && c!=']') { |
if (c == '\\') { /* Store quoted char */ |
if ((c = *s++) == '\0') /* Gotta get something */ |
badpat("Class terminates badly", source, s); |
else store(tolower(c)); |
} |
else if (c == '-' && |
(pp - cp) > 1 && *s != ']' && *s != '\0') { |
c = pp[-1]; /* Range start */ |
pp[-1] = RANGE; /* Range signal */ |
store(c); /* Re-store start */ |
c = *s++; /* Get end char and*/ |
store(tolower(c)); /* Store it */ |
} |
else { |
store(tolower(c)); /* Store normal char */ |
} |
} |
if (c != ']') |
badpat("Unterminated class", source, s); |
if ((c = (pp - cp)) >= 256) |
badpat("Class too large", source, s); |
if (c == 0) |
badpat("Empty class", source, s); |
*cp = c; |
return(s); |
} |
|
/*** Store an entry in the pattern buffer **************/ |
void store(int op) |
{ |
if (pp >= &pbuf[PMAX]) |
error("Pattern too complex\n"); |
*pp++ = op; |
} |
|
/*** Report a bad pattern specification ****************/ |
void badpat(char *message, char *source, char *stop) |
/* char *message; // Error message */ |
/* char *source; // Pattern start */ |
/* char *stop; // Pattern end */ |
{ |
fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source); |
fprintf(stderr, "-GREP-E-Stopped at byte %ld, '%c'\n", |
stop-source, stop[-1]); |
error("?GREP-E-Bad pattern\n"); |
} |
|
/*** Scan the file for the pattern in pbuf[] ***********/ |
void grep(FILE *fp, char *fn) |
/* FILE *fp; // File to process */ |
/* char *fn; // File name (for -f option) */ |
{ |
int lno, count, m; |
|
lno = 0; |
count = 0; |
while (fgets(lbuf, LMAX, fp)) { |
++lno; |
m = match(); |
if ((m && !vflag) || (!m && vflag)) { |
++count; |
if (!cflag) { |
if (fflag && fn) { |
file(fn); |
fn = 0; |
} |
if (nflag) |
printf("%d\t", lno); |
printf("%s\n", lbuf); |
} |
} |
} |
if (cflag) { |
if (fflag && fn) |
file(fn); |
printf("%d\n", count); |
} |
} |
|
/*** Match line (lbuf) with pattern (pbuf) return 1 if match ***/ |
int match() |
{ |
char *l; /* Line pointer */ |
|
for (l = lbuf; *l; ++l) { |
if (pmatch(l, pbuf)) |
return(1); |
} |
return(0); |
} |
|
/*** Match partial line with pattern *******************/ |
char *pmatch(char *line, char *pattern) |
/* char *line; // (partial) line to match */ |
/* char *pattern; // (partial) pattern to match */ |
{ |
char *l; /* Current line pointer */ |
char *p; /* Current pattern pointer */ |
char c; /* Current character */ |
char *e; /* End for STAR and PLUS match */ |
int op; /* Pattern operation */ |
int n; /* Class counter */ |
char *are; /* Start of STAR match */ |
|
l = line; |
if (debug > 1) |
printf("pmatch(\"%s\")\n", line); |
p = pattern; |
while ((op = *p++) != ENDPAT) { |
if (debug > 1) |
printf("byte[%ld] = 0%o, '%c', op = 0%o\n", |
l-line, *l, *l, op); |
switch(op) { |
|
case CHAR: |
if (tolower(*l++) != *p++) |
return(0); |
break; |
|
case BOL: |
if (l != lbuf) |
return(0); |
break; |
|
case EOL: |
if (*l != '\0') |
return(0); |
break; |
|
case ANY: |
if (*l++ == '\0') |
return(0); |
break; |
|
case DIGIT: |
if ((c = *l++) < '0' || (c > '9')) |
return(0); |
break; |
|
case ALPHA: |
c = tolower(*l++); |
if (c < 'a' || c > 'z') |
return(0); |
break; |
|
case NALPHA: |
c = tolower(*l++); |
if (c >= 'a' && c <= 'z') |
break; |
else if (c < '0' || c > '9') |
return(0); |
break; |
|
case PUNCT: |
c = *l++; |
if (c == 0 || c > ' ') |
return(0); |
break; |
|
case CLASS: |
case NCLASS: |
c = tolower(*l++); |
n = *p++ & 0377; |
do { |
if (*p == RANGE) { |
p += 3; |
n -= 2; |
if (c >= p[-2] && c <= p[-1]) |
break; |
} |
else if (c == *p++) |
break; |
} while (--n > 1); |
if ((op == CLASS) == (n <= 1)) |
return(0); |
if (op == CLASS) |
p += n - 2; |
break; |
|
case MINUS: |
e = pmatch(l, p); /* Look for a match */ |
while (*p++ != ENDPAT); /* Skip over pattern */ |
if (e) /* Got a match? */ |
l = e; /* Yes, update string */ |
break; /* Always succeeds */ |
|
case PLUS: /* One or more ... */ |
if ((l = pmatch(l, p)) == 0) |
return(0); /* Gotta have a match */ |
case STAR: /* Zero or more ... */ |
are = l; /* Remember line start */ |
while (*l && (e = pmatch(l, p))) |
l = e; /* Get longest match */ |
while (*p++ != ENDPAT); /* Skip over pattern */ |
while (l >= are) { /* Try to match rest */ |
if (e = pmatch(l, p)) |
return(e); |
--l; /* Nope, try earlier */ |
} |
return(0); /* Nothing else worked */ |
|
default: |
printf("Bad op code %d\n", op); |
error("Cannot happen -- match\n"); |
} |
} |
return(l); |
} |
|
/*** Report an error ***********************************/ |
void error(char *s) |
{ |
fprintf(stderr, "%s", s); |
exit(1); |
} |
|
/*** Main program - parse arguments & grep *************/ |
int main(int argc, char **argv) |
{ |
char *p; |
int c, i; |
int gotpattern; |
|
FILE *f; |
|
if (argc <= 1) |
usage("No arguments"); |
if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) { |
help(documentation); |
help(patdoc); |
return 0; |
} |
nfile = argc-1; |
gotpattern = 0; |
for (i=1; i < argc; ++i) { |
p = argv[i]; |
if (*p == '-') { |
++p; |
while (c = *p++) { |
switch(tolower(c)) { |
|
case '?': |
help(documentation); |
break; |
|
case 'C': |
case 'c': |
++cflag; |
break; |
|
case 'D': |
case 'd': |
++debug; |
break; |
|
case 'F': |
case 'f': |
++fflag; |
break; |
|
case 'n': |
case 'N': |
++nflag; |
break; |
|
case 'v': |
case 'V': |
++vflag; |
break; |
|
default: |
usage("Unknown flag"); |
} |
} |
argv[i] = 0; |
--nfile; |
} else if (!gotpattern) { |
compile(p); |
argv[i] = 0; |
++gotpattern; |
--nfile; |
} |
} |
if (!gotpattern) |
usage("No pattern"); |
if (nfile == 0) |
// grep(stdin, 0); |
; |
else { |
fflag = fflag ^ (nfile > 0); |
for (i=1; i < argc; ++i) { |
if (p = argv[i]) { |
if ((f=fopen(p, "r")) == NULL) |
cant(p); |
else { |
grep(f, p); |
fclose(f); |
} |
} |
} |
} |
return 0; |
} |
|
/* vim: set expandtab ts=4 sw=3 sts=3 tw=80 :*/ |