Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download
Views: 39598
1
"""
2
sage_parser.py
3
4
Code for parsing Sage code blocks sensibly.
5
"""
6
7
#########################################################################################
8
# Copyright (C) 2016, Sagemath Inc.
9
# #
10
# Distributed under the terms of the GNU General Public License (GPL), version 2+ #
11
# #
12
# http://www.gnu.org/licenses/ #
13
#########################################################################################
14
15
import string
16
import traceback
17
18
def get_input(prompt):
19
try:
20
r = raw_input(prompt)
21
z = r
22
if z.rstrip().endswith(':'):
23
while True:
24
try:
25
z = raw_input('... ')
26
except EOFError:
27
quit = True
28
break
29
if z != '':
30
r += '\n ' + z
31
else:
32
break
33
return r
34
except EOFError:
35
return None
36
37
#def strip_leading_prompts(code, prompts=['sage:', '....:', '...:', '>>>', '...']):
38
# code, literals, state = strip_string_literals(code)
39
# code2 = []
40
# for line in code.splitlines():
41
# line2 = line.lstrip()
42
# for p in prompts:
43
# if line2.startswith(p):
44
# line2 = line2[len(p):]
45
# if p[0] != '.':
46
# line2 = line2.lstrip()
47
# break
48
# code2.append(line2)
49
# code = ('\n'.join(code2))%literals
50
# return code
51
52
def preparse_code(code):
53
import sage.all_cmdline
54
return sage.all_cmdline.preparse(code, ignore_prompts=True)
55
56
def strip_string_literals(code, state=None):
57
new_code = []
58
literals = {}
59
counter = 0
60
start = q = 0
61
if state is None:
62
in_quote = False
63
raw = False
64
else:
65
in_quote, raw = state
66
while True:
67
sig_q = code.find("'", q)
68
dbl_q = code.find('"', q)
69
hash_q = code.find('#', q)
70
q = min(sig_q, dbl_q)
71
if q == -1: q = max(sig_q, dbl_q)
72
if not in_quote and hash_q != -1 and (q == -1 or hash_q < q):
73
# it's a comment
74
newline = code.find('\n', hash_q)
75
if newline == -1: newline = len(code)
76
counter += 1
77
label = "L%s" % counter
78
literals[label] = code[hash_q:newline]
79
new_code.append(code[start:hash_q].replace('%','%%'))
80
new_code.append("%%(%s)s" % label)
81
start = q = newline
82
elif q == -1:
83
if in_quote:
84
counter += 1
85
label = "L%s" % counter
86
literals[label] = code[start:]
87
new_code.append("%%(%s)s" % label)
88
else:
89
new_code.append(code[start:].replace('%','%%'))
90
break
91
elif in_quote:
92
if code[q-1] == '\\':
93
k = 2
94
while code[q-k] == '\\':
95
k += 1
96
if k % 2 == 0:
97
q += 1
98
if code[q:q+len(in_quote)] == in_quote:
99
counter += 1
100
label = "L%s" % counter
101
literals[label] = code[start:q+len(in_quote)]
102
new_code.append("%%(%s)s" % label)
103
q += len(in_quote)
104
start = q
105
in_quote = False
106
else:
107
q += 1
108
else:
109
raw = q>0 and code[q-1] in 'rR'
110
if len(code) >= q+3 and (code[q+1] == code[q] == code[q+2]):
111
in_quote = code[q]*3
112
else:
113
in_quote = code[q]
114
new_code.append(code[start:q].replace('%', '%%'))
115
start = q
116
q += len(in_quote)
117
118
return "".join(new_code), literals, (in_quote, raw)
119
120
def end_of_expr(s):
121
"""
122
The input string s is a code expression that contains no strings (they have been stripped).
123
Find the end of the expression that starts at the beginning of s by finding the first whitespace
124
at which the parenthesis and brackets are matched.
125
126
The returned index is the position *after* the expression.
127
"""
128
i = 0
129
parens = 0
130
brackets = 0
131
while i<len(s):
132
c = s[i]
133
if c == '(':
134
parens += 1
135
elif c == '[':
136
brackets += 1
137
elif c == ')':
138
parens -= 1
139
elif c == ']':
140
brackets -= 1
141
elif parens == 0 and brackets == 0 and (c == ' ' or c == '\t'):
142
return i
143
i += 1
144
return i
145
146
# NOTE/TODO: The dec_args dict will leak memory over time. However, it only
147
# contains code that was entered, so it should never get big. It
148
# seems impossible to know for sure whether a bit of code will be
149
# eventually needed later, so this leakiness seems necessary.
150
dec_counter = 0
151
dec_args = {}
152
153
# Divide the input code (a string) into blocks of code.
154
def divide_into_blocks(code):
155
global dec_counter
156
157
# strip string literals from the input, so that we can parse it without having to worry about strings
158
code, literals, state = strip_string_literals(code)
159
160
# divide the code up into line lines.
161
code = code.splitlines()
162
163
# Compute the line-level code decorators.
164
c = list(code)
165
try:
166
v = []
167
for line in code:
168
done = False
169
170
# Transform shell escape into sh decorator.
171
if line.lstrip().startswith('!'):
172
line = line.replace('!', "%%sh ", 1)
173
174
# Check for cell decorator
175
# NOTE: strip_string_literals maps % to %%, because %foo is used for python string templating.
176
if line.lstrip().startswith('%%'):
177
i = line.find("%")
178
j = end_of_expr(line[i+2:]) + i+2 + 1 # +1 for the space or tab delimiter
179
expr = line[j:]%literals
180
# Special case -- if % starts line *and* expr is empty (or a comment),
181
# then code decorators impacts the rest of the code.
182
sexpr = expr.strip()
183
if i == 0 and (len(sexpr) == 0 or sexpr.startswith('#')):
184
new_line = '%ssalvus.execute_with_code_decorators(*_salvus_parsing.dec_args[%s])'%(line[:i], dec_counter)
185
expr = ('\n'.join(code[len(v)+1:]))%literals
186
done = True
187
else:
188
# Expr is nonempty -- code decorator only impacts this line
189
new_line = '%ssalvus.execute_with_code_decorators(*_salvus_parsing.dec_args[%s])'%(line[:i], dec_counter)
190
191
dec_args[dec_counter] = ([line[i+2:j]%literals], expr)
192
dec_counter += 1
193
else:
194
new_line = line
195
v.append(new_line)
196
if done:
197
break
198
code = v
199
except Exception, mesg:
200
code = c
201
202
## Tested this: Completely disable block parsing:
203
## but it requires the caller to do "exec compile(block+'\n', '', 'exec') in namespace, locals", which means no display hook,
204
## so "2+2" breaks.
205
## return [[0,len(code)-1,('\n'.join(code))%literals]]
206
207
# Remove comment lines -- otherwise could get empty blocks that can't be exec'd.
208
# For example, exec compile('#', '', 'single') is a syntax error.
209
# Also, comments will confuse the code to break into blocks before.
210
comment_lines = {}
211
for label, v in literals.iteritems():
212
if v.startswith('#'):
213
comment_lines[u"%%(%s)s" % label] = True
214
code = [x for x in code if not comment_lines.get(x.strip(), False)]
215
216
# take only non-whitespace lines now for Python code (string literals have already been removed).
217
code = [x for x in code if x.strip()]
218
219
# Compute the blocks
220
i = len(code)-1
221
blocks = []
222
while i >= 0:
223
stop = i
224
paren_depth = code[i].count('(') - code[i].count(')')
225
brack_depth = code[i].count('[') - code[i].count(']')
226
curly_depth = code[i].count('{') - code[i].count('}')
227
while i>=0 and ((len(code[i]) > 0 and (code[i][0] in string.whitespace)) or paren_depth < 0 or brack_depth < 0 or curly_depth < 0):
228
i -= 1
229
if i >= 0:
230
paren_depth += code[i].count('(') - code[i].count(')')
231
brack_depth += code[i].count('[') - code[i].count(']')
232
curly_depth += code[i].count('{') - code[i].count('}')
233
block = ('\n'.join(code[i:]))%literals
234
bs = block.strip()
235
if bs: # has to not be only whitespace
236
blocks.insert(0, [i, stop, bs])
237
code = code[:i]
238
i = len(code)-1
239
240
# merge try/except/finally/decorator/else/elif blocks
241
i = 1
242
def merge():
243
"Merge block i-1 with block i."
244
blocks[i-1][-1] += '\n' + blocks[i][-1]
245
blocks[i-1][1] = blocks[i][1]
246
del blocks[i]
247
248
while i < len(blocks):
249
s = blocks[i][-1].lstrip()
250
251
# finally/except lines after a try
252
if (s.startswith('finally') or s.startswith('except')) and blocks[i-1][-1].lstrip().startswith('try'):
253
merge()
254
255
# function definitions
256
elif (s.startswith('def') or s.startswith('@')) and blocks[i-1][-1].splitlines()[-1].lstrip().startswith('@'):
257
merge()
258
259
# lines starting with else conditions (if *and* for *and* while!)
260
elif s.startswith('else') and (blocks[i-1][-1].lstrip().startswith('if') or blocks[i-1][-1].lstrip().startswith('while') or blocks[i-1][-1].lstrip().startswith('for') or blocks[i-1][-1].lstrip().startswith('try') or blocks[i-1][-1].lstrip().startswith('elif')):
261
merge()
262
263
# lines starting with elif
264
elif s.startswith('elif') and blocks[i-1][-1].lstrip().startswith('if'):
265
merge()
266
267
# do not merge blocks -- move on to next one
268
else:
269
i += 1
270
271
return blocks
272
273
274
275
276
############################################
277
278
CHARS0 = string.ascii_letters + string.digits + '_'
279
CHARS = CHARS0 + '.'
280
def guess_last_expression(obj): # TODO: bad guess -- need to use a parser to go any further.
281
i = len(obj)-1
282
while i >= 0 and obj[i] in CHARS:
283
i -= 1
284
return obj[i+1:]
285
286
def is_valid_identifier(target):
287
if len(target) == 0: return False
288
for x in target:
289
if x not in CHARS0:
290
return False
291
if target[0] not in string.ascii_letters + '_':
292
return False
293
return True
294
295
296
297
# Keywords from http://docs.python.org/release/2.7.2/reference/lexical_analysis.html
298
_builtin_completions = __builtins__.keys() + ['and', 'del', 'from', 'not', 'while', 'as', 'elif', 'global', 'or', 'with', 'assert', 'else', 'if', 'pass', 'yield', 'break', 'except', 'import', 'print', 'class', 'exec', 'in', 'raise', 'continue', 'finally', 'is', 'return', 'def', 'for', 'lambda', 'try']
299
300
def introspect(code, namespace, preparse=True):
301
"""
302
INPUT:
303
304
- code -- a string containing Sage (if preparse=True) or Python code.
305
306
- namespace -- a dictionary to complete in (we also complete using
307
builtins such as 'def', 'for', etc.
308
309
- preparse -- a boolean
310
311
OUTPUT:
312
313
An object: {'result':, 'target':, 'expr':, 'status':, 'get_help':, 'get_completions':, 'get_source':}
314
"""
315
import re
316
# result: the docstring, source code, or list of completions (at
317
# return, it might thus be either a list or a string)
318
result = []
319
320
# expr: the part of code that is used to do the completion, e.g.,
321
# for 'a = n.m.foo', expr would be 'n.m.foo'. It can be more complicated,
322
# e.g., for '(2+3).foo.bar' it would be '(2+3).foo'.
323
expr = ''
324
325
# target: for completions, target is the part of the code that we
326
# complete on in the namespace defined by the object right before
327
# it, e.g., for n.m.foo, the target is "foo". target is the empty
328
# string for source code and docstrings.
329
target = ''
330
331
# When returning, exactly one of the following will be true:
332
get_help = False # getting docstring of something
333
get_source = False # getting source code of a function
334
get_completions = True # getting completions of an identifier in some namespace
335
336
try:
337
# Strip all strings from the code, replacing them by template
338
# symbols; this makes parsing much easier.
339
code0, literals, state = strip_string_literals(code.strip()) # we strip, since trailing space could cause confusion below
340
341
# Move i so that it points to the start of the last expression in the code.
342
# (TODO: this should probably be replaced by using ast on preparsed version. Not easy.)
343
i = max([code0.rfind(t) for t in '\n;='])+1
344
while i<len(code0) and code0[i] in string.whitespace:
345
i += 1
346
347
# Break the line in two pieces: before_expr | expr; we may
348
# need before_expr in order to evaluate and make sense of
349
# expr. We also put the string literals back in, so that
350
# evaluation works.
351
expr = code0[i:]%literals
352
before_expr = code0[:i]%literals
353
354
chrs = set('.()[]? ')
355
if not any(c in expr for c in chrs):
356
# Easy case: this is just completion on a simple identifier in the namespace.
357
get_help = False; get_completions = True; get_source = False
358
target = expr
359
else:
360
# Now for all of the other harder cases.
361
i = max([expr.rfind(s) for s in '?('])
362
if i >= 1 and i == len(expr)-1 and expr[i-1] == '?': # expr ends in two ?? -- source code
363
get_source = True; get_completions = False; get_help = False
364
target = ""
365
obj = expr[:i-1]
366
elif i == len(expr)-1: # ends in ( or ? (but not ??) -- docstring
367
get_help = True; get_completions = False; get_source = False
368
target = ""
369
obj = expr[:i]
370
else: # completions (not docstrings or source)
371
get_help = False; get_completions = True; get_source = False
372
i = expr.rfind('.')
373
target = expr[i+1:]
374
if target == '' or is_valid_identifier(target) or '*' in expr and '* ' not in expr:
375
# this case includes list.*end[tab]
376
obj = expr[:i]
377
else:
378
# this case includes aaa=...;3 * aa[tab]
379
expr = guess_last_expression(target)
380
i = expr.rfind('.')
381
if i != -1:
382
target = expr[i+1:]
383
obj = expr[:i]
384
else:
385
target = expr
386
387
if get_completions and target == expr:
388
j = len(expr)
389
if '*' in expr:
390
# this case includes *_factors<TAB> and abc =...;3 * ab[tab]
391
try:
392
pattern = expr.replace("*",".*").replace("?",".")
393
reg = re.compile(pattern+"$")
394
v = filter(reg.match, namespace.keys() + _builtin_completions)
395
# for 2*sq[tab]
396
if len(v) == 0:
397
gle = guess_last_expression(expr)
398
j = len(gle)
399
if j > 0:
400
target = gle
401
v = [x[j:] for x in (namespace.keys() + _builtin_completions) if x.startswith(gle)]
402
except:
403
pass
404
else:
405
v = [x[j:] for x in (namespace.keys() + _builtin_completions) if x.startswith(expr)]
406
# for 2+sqr[tab]
407
if len(v) == 0:
408
gle = guess_last_expression(expr)
409
j = len(gle)
410
if j > 0 and j < len(expr):
411
target = gle
412
v = [x[j:] for x in (namespace.keys() + _builtin_completions) if x.startswith(gle)]
413
else:
414
415
# We will try to evaluate
416
# obj. This is danerous and a priori could take
417
# forever, so we spend at most 1 second doing this --
418
# if it takes longer a signal kills the evaluation.
419
# Obviously, this could in fact lock if
420
# non-interruptable code is called, which should be rare.
421
422
O = None
423
try:
424
import signal
425
def mysig(*args): raise KeyboardInterrupt
426
signal.signal(signal.SIGALRM, mysig)
427
signal.alarm(1)
428
import sage.all_cmdline
429
if before_expr.strip():
430
try:
431
exec (before_expr if not preparse else preparse_code(before_expr)) in namespace
432
except Exception, msg:
433
pass
434
# uncomment for debugging only
435
# traceback.print_exc()
436
# We first try to evaluate the part of the expression before the name
437
try:
438
O = eval(obj if not preparse else preparse_code(obj), namespace)
439
except (SyntaxError, TypeError, AttributeError):
440
# If that fails, we try on a subexpression.
441
# TODO: This will not be needed when
442
# this code is re-written to parse using an
443
# AST, instead of using this lame hack.
444
obj = guess_last_expression(obj)
445
try:
446
O = eval(obj if not preparse else preparse_code(obj), namespace)
447
except:
448
pass
449
finally:
450
signal.signal(signal.SIGALRM, signal.SIG_IGN)
451
452
def get_file():
453
try:
454
import sage.misc.sageinspect
455
return " File: " + eval('getdoc(O)', {'getdoc':sage.misc.sageinspect.sage_getfile, 'O':O}) + "\n"
456
except Exception, err:
457
return "Unable to read source filename (%s)"%err
458
459
if get_help:
460
import sage.misc.sageinspect
461
result = get_file()
462
try:
463
def our_getdoc(s):
464
try:
465
x = sage.misc.sageinspect.sage_getargspec(s)
466
defaults = list(x.defaults) if x.defaults else []
467
args = list(x.args) if x.args else []
468
v = []
469
if x.keywords:
470
v.insert(0,'**kwds')
471
if x.varargs:
472
v.insert(0,'*args')
473
while defaults:
474
d = defaults.pop()
475
k = args.pop()
476
v.insert(0,'%s=%r'%(k,d))
477
v = args + v
478
t = u" Signature : %s(%s)\n"%(obj, ', '.join(v))
479
except:
480
t = u""
481
try:
482
t += u" Docstring :\n%s" % sage.misc.sageinspect.sage_getdoc(s).decode('utf-8').strip()
483
except Exception as ex:
484
# print ex # issue 1780: 'ascii' codec can't decode byte 0xc3 in position 3719: ordinal not in range(128)
485
pass
486
return t
487
result += eval('getdoc(O)', {'getdoc':our_getdoc, 'O':O})
488
except Exception, err:
489
result += "Unable to read docstring (%s)"%err
490
result = result.lstrip().replace('\n ','\n') # Get rid of the 3 spaces in front of everything.
491
492
elif get_source:
493
import sage.misc.sageinspect
494
result = get_file()
495
try:
496
result += " Source:\n " + eval('getsource(O)', {'getsource':sage.misc.sageinspect.sage_getsource, 'O':O})
497
except Exception, err:
498
result += "Unable to read source code (%s)"%err
499
500
elif get_completions:
501
if O is not None:
502
v = dir(O)
503
if hasattr(O, 'trait_names'):
504
v += O.trait_names()
505
if not target.startswith('_'):
506
v = [x for x in v if x and not x.startswith('_')]
507
# this case excludes abc = ...;for a in ab[tab]
508
if '*' in expr and '* ' not in expr:
509
try:
510
pattern = target.replace("*",".*").replace("?",".")
511
reg = re.compile(pattern+"$")
512
v = filter(reg.match, v)
513
except:
514
pass
515
else:
516
j = len(target)
517
v = [x[j:] for x in v if x.startswith(target)]
518
else:
519
v = []
520
521
if get_completions:
522
result = list(sorted(set(v), lambda x,y:cmp(x.lower(),y.lower())))
523
524
except Exception, msg:
525
traceback.print_exc()
526
result = []
527
status = 'ok'
528
else:
529
status = 'ok'
530
return {'result':result, 'target':target, 'expr':expr, 'status':status, 'get_help':get_help, 'get_completions':get_completions, 'get_source':get_source}
531