Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download
Path: tree.py
Views: 104
1
# -*- coding: utf-8 -*-
2
3
# will be needed in the future for error checking:
4
#import re
5
#import sys
6
7
class Tree():
8
"""
9
The last tree class you will ever need. Currently under development.
10
"""
11
12
# -------------------------------------------------------------------------
13
# Constructor and supporting functions
14
15
def __init__ (self, treeString):
16
"""
17
Create an internal representation of a tree by passing in a text version of it.
18
Formatting rules apply: trees must be passed to this method in the form
19
A (B A (D E))
20
21
In other words, parentheses enclose a node's children, and sister nodes are
22
separated by whitespace. Parentheses were chosen over brackets to avoid any
23
confusion when entering HNF trees, which contain many bracketed symbols.
24
25
Any symbols can be used in node labels EXCEPT parentheses, commas, and
26
percent signs (%).
27
28
Calling this method automatically sets the recursive-style filler-role bindings
29
for each node, as well as the span-style filler-role bindings for each node.
30
"""
31
self.treeList = self.treeStringToList(treeString)
32
33
self.recursiveFRbindings = []
34
self.setRecursiveRoles(self.treeList, 'r')
35
36
self.spanFRbindings = []
37
self.setSpanRoles(self.treeList)
38
39
def treeStringToList(self, inputString):
40
"""
41
Check formatting of the input string; if it is valid, prepare it for internal
42
operations.
43
44
TO DO: check input string, most likely using regular expressions
45
- number of '(' should match number of ')'
46
- - len(findall(...))
47
- first character should be something other than ( or )
48
- no stray characters after final )
49
"""
50
51
niceList = inputString.replace(',','').split()
52
53
# clean up input such that all parentheses are separate from symbols
54
i = 0
55
while i < len(niceList):
56
currentSymbol = niceList[i]
57
if len(currentSymbol) > 1:
58
if currentSymbol[0] == '(' or currentSymbol[0] == ')':
59
niceList[i] = currentSymbol[1:]
60
niceList.insert(i, currentSymbol[0])
61
i += 1
62
currentSymbol = niceList[i]
63
if currentSymbol[-1] == '(' or currentSymbol[-1] == ')':
64
niceList[i] = currentSymbol[:-1]
65
niceList.insert(i+1, currentSymbol[-1])
66
i -= 1
67
i += 1
68
69
# insert commas between sibling nodes, if need be.
70
# this is currently required for the algorithm used to set recursive roles
71
i = 0
72
while i < len(niceList) - 1:
73
currentSymbol = niceList[i]
74
nextSymbol = niceList[i+1]
75
if (currentSymbol != ')' and currentSymbol != '(' and nextSymbol != '(' and nextSymbol != ')') or \
76
(currentSymbol == ')' and nextSymbol != '(' and nextSymbol != ')'):
77
# insert a comma
78
niceList.insert(i+1, ',')
79
i += 1
80
i += 1
81
82
return niceList
83
84
def setRecursiveRoles(self, treeNodes, level):
85
"""
86
Recursive function to populate recursiveFRbindings.
87
"""
88
if len(treeNodes) > 0:
89
#if len(self.recursiveFRbindings) == 0:
90
#self.recursiveFRbindings.append((treeNodes[0], 'r'))
91
if treeNodes[0] == '(':
92
#level += str(0)
93
level = str(0) + level
94
elif treeNodes[0] == ',':
95
#newLevel = level[0:-1] + str(int(level[-1:]) + 1)
96
newLevel = str(int(level[0]) + 1) + level[1:]
97
level = newLevel
98
elif treeNodes[0] == ')':
99
#level = level[0:len(level)-1]
100
level = level[1:]
101
else:
102
self.recursiveFRbindings.append((treeNodes[0], level))
103
104
self.setRecursiveRoles(treeNodes[1:], level)
105
106
else :
107
# all the recursive FR bindings are in place now, so let's reorder them
108
needSwapped = True
109
while needSwapped:
110
needSwapped = False
111
for i in range(len(self.recursiveFRbindings) - 1):
112
if len(self.recursiveFRbindings[i][1]) > len(self.recursiveFRbindings[i+1][1]):
113
# swap
114
temp = self.recursiveFRbindings[i]
115
self.recursiveFRbindings[i] = self.recursiveFRbindings[i+1]
116
self.recursiveFRbindings[i+1] = temp
117
needSwapped = True
118
119
def setSpanRoles(self, treeList):
120
"""
121
A dumb, iterative function to populate spanFRbindings. Once again, probably
122
bad programming practice to populate a variable like this instead of returning
123
a value.
124
125
A nice recursive algorithm to do this task is certainly possible and would
126
be far more elegant, but it turned out not to be as straighforward as I initially
127
thought... hence the inefficient iterative algorithm here. Luckily, this
128
shouldn't be a problem because most users will be looking at very small
129
trees.
130
"""
131
subTrees = self.findSubTrees(treeList)
132
133
# manually set spans of leaf nodes. this would be the base case in the recursive version.
134
spanBase = 0
135
spanDict = {}
136
for i in range(len(subTrees)):
137
if len(subTrees[i][1]) == 0:
138
spanDict[subTrees[i][0]] = str(spanBase) + str(spanBase+1)
139
spanBase += 1
140
141
# iteratively find the rest of the spans
142
done = False
143
while not done:
144
for i in range(len(subTrees)):
145
if len(subTrees[i][1]) != 0:
146
span = ''
147
invalidSpan = False
148
for child in subTrees[i][1]:
149
if child in spanDict:
150
childSpan = spanDict[child]
151
span = span + childSpan[0] + childSpan[-1]
152
else:
153
invalidSpan = True; # not all children were there
154
155
if not invalidSpan:
156
span = ''.join(sorted(set(span), key=span.index))
157
spanDict[subTrees[i][0]] = span
158
159
if len(spanDict) == len(subTrees):
160
done = True
161
162
tempBindings = len(spanDict) * [None]
163
for key in spanDict.keys():
164
splitKey = key.split('%')
165
tempBindings[int(splitKey[1])] = (splitKey[0], spanDict[key]);
166
167
self.spanFRbindings = tempBindings
168
169
def findSubTrees(self, treeList):
170
"""
171
Given a tree in list form, find the immediate children of each node.
172
"""
173
idNo = 0 # unique identifer for each node
174
for i in range(len(treeList) - 1):
175
if treeList[i] != '(' and treeList[i] != ',' and treeList[i] != ')':
176
treeList[i] += '%' + str(idNo) # since users might want to use periods in trees
177
idNo += 1
178
179
subTrees = []
180
for i in range(len(treeList) - 1):
181
if treeList[i] != '(' and treeList[i] != ',' and treeList[i] != ')':
182
currentChildren = []
183
# peek at next node; if it's '[', then this node has children and we need to collect them
184
j = i + 1
185
hasChildren = (treeList[j] == '(')
186
if hasChildren:
187
embedded = -1
188
j += 1
189
while j < len(treeList) and embedded != 0:
190
if treeList[j] == ')':
191
embedded += 1;
192
elif treeList[j] == '(':
193
embedded -= 1;
194
elif treeList[j] != ',' and embedded == -1:
195
currentChildren.append(treeList[j] )
196
j += 1
197
subTrees.append((treeList[i], currentChildren))
198
199
return subTrees
200
201
202
# -------------------------------------------------------------------------
203
# Setters and supporting functions
204
205
def getFRbindings(self):
206
return self.recursiveFRbindings
207
208
209
# -------------------------------------------------------------------------
210
# Pretty 'toString' methods
211
212
def recursiveFRtoString(self):
213
"""
214
Gets a pretty string for the recursive-style filler-role bindings.
215
"""
216
returnString = '{'
217
for i in range(len(self.recursiveFRbindings)):
218
if i != 0:
219
returnString += ' '
220
returnString += self.recursiveFRbindings[i][0] + '/' + self.recursiveFRbindings[i][1]
221
if i != (len(self.recursiveFRbindings) - 1) :
222
returnString += '; \n'
223
returnString += '}\n'
224
225
return returnString
226
227
def spanFRtoString(self):
228
"""
229
Gets a pretty string for the span-style filler-role bindings.
230
"""
231
returnString = '{'
232
for i in range(len(self.spanFRbindings)):
233
if i != 0:
234
returnString += ' '
235
returnString += self.spanFRbindings[i][0] + '/' + self.spanFRbindings[i][1]
236
if i != (len(self.spanFRbindings) - 1) :
237
returnString += '; \n'
238
returnString += '}\n'
239
240
return returnString
241
242