CoCalc -- sage_parsing.py

Path: cocalc/src / smc_sagews / smc_sagews / sage_parsing.py
Views: ³⁹⁵⁹⁸
1
"""
2
sage_parser.py
3

4
Code for parsing Sage code blocks sensibly.
5
"""
6

7
#########################################################################################
8
#       Copyright (C) 2016, Sagemath Inc.
9
#                                                                                       #
10
#  Distributed under the terms of the GNU General Public License (GPL), version 2+      #
11
#                                                                                       #
12
#                  http://www.gnu.org/licenses/                                         #
13
#########################################################################################
14

15
import string
16
import traceback
17

18
def get_input(prompt):
19
    try:
20
        r = raw_input(prompt)
21
        z = r
22
        if z.rstrip().endswith(':'):
23
            while True:
24
                try:
25
                    z = raw_input('...       ')
26
                except EOFError:
27
                    quit = True
28
                    break
29
                if z != '':
30
                    r += '\n    ' + z
31
                else:
32
                    break
33
        return r
34
    except EOFError:
35
        return None
36

37
#def strip_leading_prompts(code, prompts=['sage:', '....:', '...:', '>>>', '...']):
38
#    code, literals, state = strip_string_literals(code)
39
#    code2 = []
40
#    for line in code.splitlines():
41
#        line2 = line.lstrip()
42
#        for p in prompts:
43
#            if line2.startswith(p):
44
#                line2 = line2[len(p):]
45
#                if p[0] != '.':
46
#                    line2 = line2.lstrip()
47
#                break
48
#        code2.append(line2)
49
#    code = ('\n'.join(code2))%literals
50
#    return code
51

52
def preparse_code(code):
53
    import sage.all_cmdline
54
    return sage.all_cmdline.preparse(code, ignore_prompts=True)
55

56
def strip_string_literals(code, state=None):
57
    new_code = []
58
    literals = {}
59
    counter = 0
60
    start = q = 0
61
    if state is None:
62
        in_quote = False
63
        raw = False
64
    else:
65
        in_quote, raw = state
66
    while True:
67
        sig_q = code.find("'", q)
68
        dbl_q = code.find('"', q)
69
        hash_q = code.find('#', q)
70
        q = min(sig_q, dbl_q)
71
        if q == -1: q = max(sig_q, dbl_q)
72
        if not in_quote and hash_q != -1 and (q == -1 or hash_q < q):
73
            # it's a comment
74
            newline = code.find('\n', hash_q)
75
            if newline == -1: newline = len(code)
76
            counter += 1
77
            label = "L%s" % counter
78
            literals[label] = code[hash_q:newline]
79
            new_code.append(code[start:hash_q].replace('%','%%'))
80
            new_code.append("%%(%s)s" % label)
81
            start = q = newline
82
        elif q == -1:
83
            if in_quote:
84
                counter += 1
85
                label = "L%s" % counter
86
                literals[label] = code[start:]
87
                new_code.append("%%(%s)s" % label)
88
            else:
89
                new_code.append(code[start:].replace('%','%%'))
90
            break
91
        elif in_quote:
92
            if code[q-1] == '\\':
93
                k = 2
94
                while code[q-k] == '\\':
95
                    k += 1
96
                if k % 2 == 0:
97
                    q += 1
98
            if code[q:q+len(in_quote)] == in_quote:
99
                counter += 1
100
                label = "L%s" % counter
101
                literals[label] = code[start:q+len(in_quote)]
102
                new_code.append("%%(%s)s" % label)
103
                q += len(in_quote)
104
                start = q
105
                in_quote = False
106
            else:
107
                q += 1
108
        else:
109
            raw = q>0 and code[q-1] in 'rR'
110
            if len(code) >= q+3 and (code[q+1] == code[q] == code[q+2]):
111
                in_quote = code[q]*3
112
            else:
113
                in_quote = code[q]
114
            new_code.append(code[start:q].replace('%', '%%'))
115
            start = q
116
            q += len(in_quote)
117

118
    return "".join(new_code), literals, (in_quote, raw)
119

120
def end_of_expr(s):
121
    """
122
    The input string s is a code expression that contains no strings (they have been stripped).
123
    Find the end of the expression that starts at the beginning of s by finding the first whitespace
124
    at which the parenthesis and brackets are matched.
125

126
    The returned index is the position *after* the expression.
127
    """
128
    i = 0
129
    parens = 0
130
    brackets = 0
131
    while i<len(s):
132
        c = s[i]
133
        if c == '(':
134
            parens += 1
135
        elif c == '[':
136
            brackets += 1
137
        elif c == ')':
138
            parens -= 1
139
        elif c == ']':
140
            brackets -= 1
141
        elif parens == 0 and brackets == 0 and (c == ' ' or c == '\t'):
142
            return i
143
        i += 1
144
    return i
145

146
# NOTE/TODO: The dec_args dict will leak memory over time.  However, it only
147
# contains code that was entered, so it should never get big.  It
148
# seems impossible to know for sure whether a bit of code will be
149
# eventually needed later, so this leakiness seems necessary.
150
dec_counter = 0
151
dec_args = {}
152

153
# Divide the input code (a string) into blocks of code.
154
def divide_into_blocks(code):
155
    global dec_counter
156

157
    # strip string literals from the input, so that we can parse it without having to worry about strings
158
    code, literals, state = strip_string_literals(code)
159

160
    # divide the code up into line lines.
161
    code = code.splitlines()
162

163
    # Compute the line-level code decorators.
164
    c = list(code)
165
    try:
166
        v = []
167
        for line in code:
168
            done = False
169

170
            # Transform shell escape into sh decorator.
171
            if line.lstrip().startswith('!'):
172
                line = line.replace('!', "%%sh ", 1)
173

174
            # Check for cell decorator
175
            # NOTE: strip_string_literals maps % to %%, because %foo is used for python string templating.
176
            if line.lstrip().startswith('%%'):
177
                i = line.find("%")
178
                j = end_of_expr(line[i+2:]) + i+2  + 1 # +1 for the space or tab delimiter
179
                expr = line[j:]%literals
180
                # Special case -- if % starts line *and* expr is empty (or a comment),
181
                # then code decorators impacts the rest of the code.
182
                sexpr = expr.strip()
183
                if i == 0 and (len(sexpr) == 0 or sexpr.startswith('#')):
184
                    new_line = '%ssalvus.execute_with_code_decorators(*_salvus_parsing.dec_args[%s])'%(line[:i], dec_counter)
185
                    expr = ('\n'.join(code[len(v)+1:]))%literals
186
                    done = True
187
                else:
188
                    # Expr is nonempty -- code decorator only impacts this line
189
                    new_line = '%ssalvus.execute_with_code_decorators(*_salvus_parsing.dec_args[%s])'%(line[:i], dec_counter)
190

191
                dec_args[dec_counter] = ([line[i+2:j]%literals], expr)
192
                dec_counter += 1
193
            else:
194
                new_line = line
195
            v.append(new_line)
196
            if done:
197
                break
198
        code = v
199
    except Exception, mesg:
200
        code = c
201

202
    ## Tested this: Completely disable block parsing:
203
    ## but it requires the caller to do "exec compile(block+'\n', '', 'exec') in namespace, locals", which means no display hook,
204
    ## so "2+2" breaks.
205
    ## return [[0,len(code)-1,('\n'.join(code))%literals]]
206

207
    # Remove comment lines -- otherwise could get empty blocks that can't be exec'd.
208
    # For example, exec compile('#', '', 'single') is a syntax error.
209
    # Also, comments will confuse the code to break into blocks before.
210
    comment_lines = {}
211
    for label, v in literals.iteritems():
212
        if v.startswith('#'):
213
            comment_lines[u"%%(%s)s" % label] = True
214
    code = [x for x in code if not comment_lines.get(x.strip(), False)]
215

216
    # take only non-whitespace lines now for Python code (string literals have already been removed).
217
    code = [x for x in code if x.strip()]
218

219
    # Compute the blocks
220
    i = len(code)-1
221
    blocks = []
222
    while i >= 0:
223
        stop = i
224
        paren_depth = code[i].count('(') - code[i].count(')')
225
        brack_depth = code[i].count('[') - code[i].count(']')
226
        curly_depth = code[i].count('{') - code[i].count('}')
227
        while i>=0 and ((len(code[i]) > 0 and (code[i][0] in string.whitespace)) or paren_depth < 0 or brack_depth < 0 or curly_depth < 0):
228
            i -= 1
229
            if i >= 0:
230
                paren_depth += code[i].count('(') - code[i].count(')')
231
                brack_depth += code[i].count('[') - code[i].count(']')
232
                curly_depth += code[i].count('{') - code[i].count('}')
233
        block = ('\n'.join(code[i:]))%literals
234
        bs = block.strip()
235
        if bs: # has to not be only whitespace
236
            blocks.insert(0, [i, stop, bs])
237
        code = code[:i]
238
        i = len(code)-1
239

240
    # merge try/except/finally/decorator/else/elif blocks
241
    i = 1
242
    def merge():
243
        "Merge block i-1 with block i."
244
        blocks[i-1][-1] += '\n' + blocks[i][-1]
245
        blocks[i-1][1] = blocks[i][1]
246
        del blocks[i]
247

248
    while i < len(blocks):
249
        s = blocks[i][-1].lstrip()
250

251
        # finally/except lines after a try
252
        if (s.startswith('finally') or s.startswith('except')) and blocks[i-1][-1].lstrip().startswith('try'):
253
            merge()
254

255
        # function definitions
256
        elif (s.startswith('def') or s.startswith('@')) and blocks[i-1][-1].splitlines()[-1].lstrip().startswith('@'):
257
            merge()
258

259
        # lines starting with else conditions (if *and* for *and* while!)
260
        elif s.startswith('else') and (blocks[i-1][-1].lstrip().startswith('if') or blocks[i-1][-1].lstrip().startswith('while') or blocks[i-1][-1].lstrip().startswith('for') or blocks[i-1][-1].lstrip().startswith('try') or blocks[i-1][-1].lstrip().startswith('elif')):
261
            merge()
262

263
        # lines starting with elif
264
        elif s.startswith('elif') and blocks[i-1][-1].lstrip().startswith('if'):
265
            merge()
266

267
        # do not merge blocks -- move on to next one
268
        else:
269
            i += 1
270

271
    return blocks
272

273

274

275

276
############################################
277

278
CHARS0 = string.ascii_letters + string.digits + '_'
279
CHARS  = CHARS0 + '.'
280
def guess_last_expression(obj):  # TODO: bad guess -- need to use a parser to go any further.
281
    i = len(obj)-1
282
    while i >= 0 and obj[i] in CHARS:
283
        i -= 1
284
    return obj[i+1:]
285

286
def is_valid_identifier(target):
287
    if len(target) == 0: return False
288
    for x in target:
289
        if x not in CHARS0:
290
            return False
291
    if target[0] not in string.ascii_letters + '_':
292
        return False
293
    return True
294

295

296

297
# Keywords from http://docs.python.org/release/2.7.2/reference/lexical_analysis.html
298
_builtin_completions = __builtins__.keys() + ['and', 'del', 'from', 'not', 'while', 'as', 'elif', 'global', 'or', 'with', 'assert', 'else', 'if', 'pass', 'yield', 'break', 'except', 'import', 'print', 'class', 'exec', 'in', 'raise', 'continue', 'finally', 'is', 'return', 'def', 'for', 'lambda', 'try']
299

300
def introspect(code, namespace, preparse=True):
301
    """
302
    INPUT:
303

304
    - code -- a string containing Sage (if preparse=True) or Python code.
305

306
    - namespace -- a dictionary to complete in (we also complete using
307
      builtins such as 'def', 'for', etc.
308

309
    - preparse -- a boolean
310

311
    OUTPUT:
312

313
    An object: {'result':, 'target':, 'expr':, 'status':, 'get_help':, 'get_completions':, 'get_source':}
314
    """
315
    import re
316
    # result: the docstring, source code, or list of completions (at
317
    # return, it might thus be either a list or a string)
318
    result = []
319

320
    # expr: the part of code that is used to do the completion, e.g.,
321
    # for 'a = n.m.foo', expr would be 'n.m.foo'.  It can be more complicated,
322
    # e.g., for '(2+3).foo.bar' it would be '(2+3).foo'.
323
    expr   = ''
324

325
    # target: for completions, target is the part of the code that we
326
    # complete on in the namespace defined by the object right before
327
    # it, e.g., for n.m.foo, the target is "foo".  target is the empty
328
    # string for source code and docstrings.
329
    target = ''
330

331
    # When returning, exactly one of the following will be true:
332
    get_help        = False      # getting docstring of something
333
    get_source      = False      # getting source code of a function
334
    get_completions = True       # getting completions of an identifier in some namespace
335

336
    try:
337
        # Strip all strings from the code, replacing them by template
338
        # symbols; this makes parsing much easier.
339
        code0, literals, state = strip_string_literals(code.strip())  # we strip, since trailing space could cause confusion below
340

341
        # Move i so that it points to the start of the last expression in the code.
342
        # (TODO: this should probably be replaced by using ast on preparsed version.  Not easy.)
343
        i = max([code0.rfind(t) for t in '\n;='])+1
344
        while i<len(code0) and code0[i] in string.whitespace:
345
            i += 1
346

347
        # Break the line in two pieces: before_expr | expr; we may
348
        # need before_expr in order to evaluate and make sense of
349
        # expr.  We also put the string literals back in, so that
350
        # evaluation works.
351
        expr        = code0[i:]%literals
352
        before_expr = code0[:i]%literals
353

354
        chrs = set('.()[]? ')
355
        if not any(c in expr for c in chrs):
356
            # Easy case: this is just completion on a simple identifier in the namespace.
357
            get_help = False; get_completions = True; get_source = False
358
            target = expr
359
        else:
360
            # Now for all of the other harder cases.
361
            i = max([expr.rfind(s) for s in '?('])
362
            if i >= 1 and i == len(expr)-1 and expr[i-1] == '?':  # expr ends in two ?? -- source code
363
                get_source = True; get_completions = False; get_help = False
364
                target = ""
365
                obj    = expr[:i-1]
366
            elif i == len(expr)-1:    # ends in ( or ? (but not ??) -- docstring
367
                get_help = True; get_completions = False; get_source = False
368
                target = ""
369
                obj    = expr[:i]
370
            else:  # completions (not docstrings or source)
371
                get_help = False; get_completions = True; get_source = False
372
                i      = expr.rfind('.')
373
                target = expr[i+1:]
374
                if target == '' or is_valid_identifier(target) or '*' in expr and '* ' not in expr:
375
                    # this case includes list.*end[tab]
376
                    obj    = expr[:i]
377
                else:
378
                    # this case includes aaa=...;3 * aa[tab]
379
                    expr = guess_last_expression(target)
380
                    i = expr.rfind('.')
381
                    if i != -1:
382
                        target = expr[i+1:]
383
                        obj    = expr[:i]
384
                    else:
385
                        target = expr
386

387
        if get_completions and target == expr:
388
            j      = len(expr)
389
            if '*' in expr:
390
                # this case includes *_factors<TAB> and abc =...;3 * ab[tab]
391
                try:
392
                    pattern = expr.replace("*",".*").replace("?",".")
393
                    reg = re.compile(pattern+"$")
394
                    v = filter(reg.match, namespace.keys() + _builtin_completions)
395
                    # for 2*sq[tab]
396
                    if len(v) == 0:
397
                        gle = guess_last_expression(expr)
398
                        j = len(gle)
399
                        if j > 0:
400
                            target = gle
401
                            v = [x[j:] for x in (namespace.keys() + _builtin_completions) if x.startswith(gle)]
402
                except:
403
                    pass
404
            else:
405
                v = [x[j:] for x in (namespace.keys() + _builtin_completions) if x.startswith(expr)]
406
                # for 2+sqr[tab]
407
                if len(v) == 0:
408
                    gle = guess_last_expression(expr)
409
                    j = len(gle)
410
                    if j > 0 and j < len(expr):
411
                        target = gle
412
                        v = [x[j:] for x in (namespace.keys() + _builtin_completions) if x.startswith(gle)]
413
        else:
414

415
            # We will try to evaluate
416
            # obj.  This is danerous and a priori could take
417
            # forever, so we spend at most 1 second doing this --
418
            # if it takes longer a signal kills the evaluation.
419
            # Obviously, this could in fact lock if
420
            # non-interruptable code is called, which should be rare.
421

422
            O = None
423
            try:
424
                import signal
425
                def mysig(*args): raise KeyboardInterrupt
426
                signal.signal(signal.SIGALRM, mysig)
427
                signal.alarm(1)
428
                import sage.all_cmdline
429
                if before_expr.strip():
430
                    try:
431
                        exec (before_expr if not preparse else preparse_code(before_expr)) in namespace
432
                    except Exception, msg:
433
                        pass
434
                        # uncomment for debugging only
435
                        # traceback.print_exc()
436
                # We first try to evaluate the part of the expression before the name
437
                try:
438
                    O = eval(obj if not preparse else preparse_code(obj), namespace)
439
                except (SyntaxError, TypeError, AttributeError):
440
                    # If that fails, we try on a subexpression.
441
                    # TODO: This will not be needed when
442
                    # this code is re-written to parse using an
443
                    # AST, instead of using this lame hack.
444
                    obj = guess_last_expression(obj)
445
                    try:
446
                        O = eval(obj if not preparse else preparse_code(obj), namespace)
447
                    except:
448
                        pass
449
            finally:
450
                signal.signal(signal.SIGALRM, signal.SIG_IGN)
451

452
            def get_file():
453
                try:
454
                    import sage.misc.sageinspect
455
                    return "   File: " + eval('getdoc(O)', {'getdoc':sage.misc.sageinspect.sage_getfile, 'O':O}) + "\n"
456
                except Exception, err:
457
                    return "Unable to read source filename (%s)"%err
458

459
            if get_help:
460
                import sage.misc.sageinspect
461
                result = get_file()
462
                try:
463
                    def our_getdoc(s):
464
                        try:
465
                            x = sage.misc.sageinspect.sage_getargspec(s)
466
                            defaults = list(x.defaults) if x.defaults else []
467
                            args = list(x.args) if x.args else []
468
                            v = []
469
                            if x.keywords:
470
                                v.insert(0,'**kwds')
471
                            if x.varargs:
472
                                v.insert(0,'*args')
473
                            while defaults:
474
                                d = defaults.pop()
475
                                k = args.pop()
476
                                v.insert(0,'%s=%r'%(k,d))
477
                            v = args + v
478
                            t = u"   Signature : %s(%s)\n"%(obj, ', '.join(v))
479
                        except:
480
                            t = u""
481
                        try:
482
                            t += u"   Docstring :\n%s" % sage.misc.sageinspect.sage_getdoc(s).decode('utf-8').strip()
483
                        except Exception as ex:
484
                            # print ex  # issue 1780: 'ascii' codec can't decode byte 0xc3 in position 3719: ordinal not in range(128)
485
                            pass
486
                        return t
487
                    result += eval('getdoc(O)', {'getdoc':our_getdoc, 'O':O})
488
                except Exception, err:
489
                    result += "Unable to read docstring (%s)"%err
490
                result = result.lstrip().replace('\n   ','\n')  # Get rid of the 3 spaces in front of everything.
491

492
            elif get_source:
493
                import sage.misc.sageinspect
494
                result = get_file()
495
                try:
496
                    result += "   Source:\n   " + eval('getsource(O)', {'getsource':sage.misc.sageinspect.sage_getsource, 'O':O})
497
                except Exception, err:
498
                    result += "Unable to read source code (%s)"%err
499

500
            elif get_completions:
501
                if O is not None:
502
                    v = dir(O)
503
                    if hasattr(O, 'trait_names'):
504
                        v += O.trait_names()
505
                    if not target.startswith('_'):
506
                        v = [x for x in v if x and not x.startswith('_')]
507
                    # this case excludes abc = ...;for a in ab[tab]
508
                    if '*' in expr and '* ' not in expr:
509
                        try:
510
                            pattern = target.replace("*",".*").replace("?",".")
511
                            reg = re.compile(pattern+"$")
512
                            v = filter(reg.match, v)
513
                        except:
514
                            pass
515
                    else:
516
                        j = len(target)
517
                        v = [x[j:] for x in v if x.startswith(target)]
518
                else:
519
                    v = []
520

521
        if get_completions:
522
            result = list(sorted(set(v), lambda x,y:cmp(x.lower(),y.lower())))
523

524
    except Exception, msg:
525
        traceback.print_exc()
526
        result = []
527
        status = 'ok'
528
    else:
529
        status = 'ok'
530
    return {'result':result, 'target':target, 'expr':expr, 'status':status, 'get_help':get_help, 'get_completions':get_completions, 'get_source':get_source}
531
Product

Resources

Company