Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1913 jaeger 1
class Token:
2
    def __init__(self,pos=(0,0),type='symbol',val=None,items=None):
3
        self.pos,self.type,self.val,self.items=pos,type,val,items
4
 
5
def u_error(ctx,s,i):
6
    y,x = i
7
    line = s.split('\n')[y-1]
8
    p = ''
9
    if y < 10: p += ' '
10
    if y < 100: p += '  '
11
    r = p + str(y) + ": " + line + "\n"
12
    r += "     "+" "*x+"^" +'\n'
13
    raise 'error: '+ctx+'\n'+r
14
 
15
ISYMBOLS = '`-=[];,./~!@$%^&*()+{}:<>?'
16
SYMBOLS = [
17
    'def','class','yield','return','pass','and','or','not','in','import',
18
    'is','while','break','for','continue','if','else','elif','try',
19
    'except','raise','True','False','None','global','del','from',
20
    '-','+','*','**','/','%','<<','>>',
21
    '-=','+=','*=','/=','=','==','!=','<','>',
22
    '<=','>=','[',']','{','}','(',')','.',':',',',';','&','|','!',
23
    ]
24
B_BEGIN,B_END = ['[','(','{'],[']',')','}']
25
 
26
class TData:
27
    def __init__(self):
28
        self.y,self.yi,self.nl = 1,0,True
29
        self.res,self.indent,self.braces = [],[0],0
30
    def add(self,t,v): self.res.append(Token(self.f,t,v))
31
 
32
def clean(s):
33
    s = s.replace('\r\n','\n')
34
    s = s.replace('\r','\n')
35
    return s
36
 
37
def tokenize(s):
38
    s = clean(s)
39
    try: return do_tokenize(s)
40
    except: u_error('tokenize',s,T.f)
41
 
42
def do_tokenize(s):
43
    global T
44
    T,i,l = TData(),0,len(s)
45
    T.f = (T.y,i-T.yi+1)
46
    while i < l:
47
        c = s[i]; T.f = (T.y,i-T.yi+1)
48
        if T.nl: T.nl = False; i = do_indent(s,i,l)
49
        elif c == '\n': i = do_nl(s,i,l)
50
        elif c in ISYMBOLS: i = do_symbol(s,i,l)
51
        elif c >= '0' and c <= '9': i = do_number(s,i,l)
52
        elif (c >= 'a' and c <= 'z') or \
53
            (c >= 'A' and c <= 'Z') or c == '_':  i = do_name(s,i,l)
54
        elif c=='"' or c=="'": i = do_string(s,i,l)
55
        elif c=='#': i = do_comment(s,i,l)
56
        elif c == '\\' and s[i+1] == '\n':
57
            i += 2; T.y,T.yi = T.y+1,i
58
        elif c == ' ' or c == '\t': i += 1
59
        else: u_error('tokenize',s,T.f)
60
    indent(0)
61
    r = T.res; T = None
62
    return r
63
 
64
def do_nl(s,i,l):
65
    if not T.braces:
66
        T.add('nl',None)
67
    i,T.nl = i+1,True
68
    T.y,T.yi = T.y+1,i
69
    return i
70
 
71
def do_indent(s,i,l):
72
    v = 0
73
    while i
74
        c = s[i]
75
        if c != ' ' and c != '\t': break
76
        i,v = i+1,v+1
77
    if c != '\n' and c != '#' and not T.braces: indent(v)
78
    return i
79
 
80
def indent(v):
81
    if v == T.indent[-1]: pass
82
    elif v > T.indent[-1]:
83
        T.indent.append(v)
84
        T.add('indent',v)
85
    elif v < T.indent[-1]:
86
        n = T.indent.index(v)
87
        while len(T.indent) > n+1:
88
            v = T.indent.pop()
89
            T.add('dedent',v)
90
 
91
 
92
def do_symbol(s,i,l):
93
    symbols = []
94
    v,f,i = s[i],i,i+1
95
    if v in SYMBOLS: symbols.append(v)
96
    while i
97
        c = s[i]
98
        if not c in ISYMBOLS: break
99
        v,i = v+c,i+1
100
        if v in SYMBOLS: symbols.append(v)
101
    v = symbols.pop(); n = len(v); i = f+n
102
    T.add('symbol',v)
103
    if v in B_BEGIN: T.braces += 1
104
    if v in B_END: T.braces -= 1
105
    return i
106
 
107
def do_number(s,i,l):
108
    v,i,c =s[i],i+1,s[i]
109
    while i
110
        c = s[i]
111
        if (c < '0' or c > '9') and (c < 'a' or c > 'f') and c != 'x': break
112
        v,i = v+c,i+1
113
    if c == '.':
114
        v,i = v+c,i+1
115
        while i
116
            c = s[i]
117
            if c < '0' or c > '9': break
118
            v,i = v+c,i+1
119
    T.add('number',v)
120
    return i
121
 
122
def do_name(s,i,l):
123
    v,i =s[i],i+1
124
    while i
125
        c = s[i]
126
        if (c < 'a' or c > 'z') and (c < 'A' or c > 'Z') and (c < '0' or c > '9') and c != '_': break
127
        v,i = v+c,i+1
128
    if v in SYMBOLS: T.add('symbol',v)
129
    else: T.add('name',v)
130
    return i
131
 
132
def do_string(s,i,l):
133
    v,q,i = '',s[i],i+1
134
    if (l-i) >= 5 and s[i] == q and s[i+1] == q: # """
135
        i += 2
136
        while i
137
            c = s[i]
138
            if c == q and s[i+1] == q and s[i+2] == q:
139
                i += 3
140
                T.add('string',v)
141
                break
142
            else:
143
                v,i = v+c,i+1
144
                if c == '\n': T.y,T.yi = T.y+1,i
145
    else:
146
        while i
147
            c = s[i]
148
            if c == "\\":
149
                i = i+1; c = s[i]
150
                if c == "n": c = '\n'
151
                if c == "r": c = chr(13)
152
                if c == "t": c = "\t"
153
                if c == "0": c = "\0"
154
                v,i = v+c,i+1
155
            elif c == q:
156
                i += 1
157
                T.add('string',v)
158
                break
159
            else:
160
                v,i = v+c,i+1
161
    return i
162
 
163
def do_comment(s,i,l):
164
    i += 1
165
    while i
166
        c = s[i]
167
        if c == '\n': break
168
        i += 1
169
    return i
170
 
171