class Tree():
"""
The last tree class you will ever need. Currently under development.
"""
def __init__ (self, treeString):
"""
Create an internal representation of a tree by passing in a text version of it.
Formatting rules apply: trees must be passed to this method in the form
A (B A (D E))
In other words, parentheses enclose a node's children, and sister nodes are
separated by whitespace. Parentheses were chosen over brackets to avoid any
confusion when entering HNF trees, which contain many bracketed symbols.
Any symbols can be used in node labels EXCEPT parentheses, commas, and
percent signs (%).
Calling this method automatically sets the recursive-style filler-role bindings
for each node, as well as the span-style filler-role bindings for each node.
"""
self.treeList = self.treeStringToList(treeString)
self.recursiveFRbindings = []
self.setRecursiveRoles(self.treeList, 'r')
self.spanFRbindings = []
self.setSpanRoles(self.treeList)
def treeStringToList(self, inputString):
"""
Check formatting of the input string; if it is valid, prepare it for internal
operations.
TO DO: check input string, most likely using regular expressions
- number of '(' should match number of ')'
- - len(findall(...))
- first character should be something other than ( or )
- no stray characters after final )
"""
niceList = inputString.replace(',','').split()
i = 0
while i < len(niceList):
currentSymbol = niceList[i]
if len(currentSymbol) > 1:
if currentSymbol[0] == '(' or currentSymbol[0] == ')':
niceList[i] = currentSymbol[1:]
niceList.insert(i, currentSymbol[0])
i += 1
currentSymbol = niceList[i]
if currentSymbol[-1] == '(' or currentSymbol[-1] == ')':
niceList[i] = currentSymbol[:-1]
niceList.insert(i+1, currentSymbol[-1])
i -= 1
i += 1
i = 0
while i < len(niceList) - 1:
currentSymbol = niceList[i]
nextSymbol = niceList[i+1]
if (currentSymbol != ')' and currentSymbol != '(' and nextSymbol != '(' and nextSymbol != ')') or \
(currentSymbol == ')' and nextSymbol != '(' and nextSymbol != ')'):
niceList.insert(i+1, ',')
i += 1
i += 1
return niceList
def setRecursiveRoles(self, treeNodes, level):
"""
Recursive function to populate recursiveFRbindings.
"""
if len(treeNodes) > 0:
if treeNodes[0] == '(':
level = str(0) + level
elif treeNodes[0] == ',':
newLevel = str(int(level[0]) + 1) + level[1:]
level = newLevel
elif treeNodes[0] == ')':
level = level[1:]
else:
self.recursiveFRbindings.append((treeNodes[0], level))
self.setRecursiveRoles(treeNodes[1:], level)
else :
needSwapped = True
while needSwapped:
needSwapped = False
for i in range(len(self.recursiveFRbindings) - 1):
if len(self.recursiveFRbindings[i][1]) > len(self.recursiveFRbindings[i+1][1]):
temp = self.recursiveFRbindings[i]
self.recursiveFRbindings[i] = self.recursiveFRbindings[i+1]
self.recursiveFRbindings[i+1] = temp
needSwapped = True
def setSpanRoles(self, treeList):
"""
A dumb, iterative function to populate spanFRbindings. Once again, probably
bad programming practice to populate a variable like this instead of returning
a value.
A nice recursive algorithm to do this task is certainly possible and would
be far more elegant, but it turned out not to be as straighforward as I initially
thought... hence the inefficient iterative algorithm here. Luckily, this
shouldn't be a problem because most users will be looking at very small
trees.
"""
subTrees = self.findSubTrees(treeList)
spanBase = 0
spanDict = {}
for i in range(len(subTrees)):
if len(subTrees[i][1]) == 0:
spanDict[subTrees[i][0]] = str(spanBase) + str(spanBase+1)
spanBase += 1
done = False
while not done:
for i in range(len(subTrees)):
if len(subTrees[i][1]) != 0:
span = ''
invalidSpan = False
for child in subTrees[i][1]:
if child in spanDict:
childSpan = spanDict[child]
span = span + childSpan[0] + childSpan[-1]
else:
invalidSpan = True;
if not invalidSpan:
span = ''.join(sorted(set(span), key=span.index))
spanDict[subTrees[i][0]] = span
if len(spanDict) == len(subTrees):
done = True
tempBindings = len(spanDict) * [None]
for key in spanDict.keys():
splitKey = key.split('%')
tempBindings[int(splitKey[1])] = (splitKey[0], spanDict[key]);
self.spanFRbindings = tempBindings
def findSubTrees(self, treeList):
"""
Given a tree in list form, find the immediate children of each node.
"""
idNo = 0
for i in range(len(treeList) - 1):
if treeList[i] != '(' and treeList[i] != ',' and treeList[i] != ')':
treeList[i] += '%' + str(idNo)
idNo += 1
subTrees = []
for i in range(len(treeList) - 1):
if treeList[i] != '(' and treeList[i] != ',' and treeList[i] != ')':
currentChildren = []
j = i + 1
hasChildren = (treeList[j] == '(')
if hasChildren:
embedded = -1
j += 1
while j < len(treeList) and embedded != 0:
if treeList[j] == ')':
embedded += 1;
elif treeList[j] == '(':
embedded -= 1;
elif treeList[j] != ',' and embedded == -1:
currentChildren.append(treeList[j] )
j += 1
subTrees.append((treeList[i], currentChildren))
return subTrees
def getFRbindings(self):
return self.recursiveFRbindings
def recursiveFRtoString(self):
"""
Gets a pretty string for the recursive-style filler-role bindings.
"""
returnString = '{'
for i in range(len(self.recursiveFRbindings)):
if i != 0:
returnString += ' '
returnString += self.recursiveFRbindings[i][0] + '/' + self.recursiveFRbindings[i][1]
if i != (len(self.recursiveFRbindings) - 1) :
returnString += '; \n'
returnString += '}\n'
return returnString
def spanFRtoString(self):
"""
Gets a pretty string for the span-style filler-role bindings.
"""
returnString = '{'
for i in range(len(self.spanFRbindings)):
if i != 0:
returnString += ' '
returnString += self.spanFRbindings[i][0] + '/' + self.spanFRbindings[i][1]
if i != (len(self.spanFRbindings) - 1) :
returnString += '; \n'
returnString += '}\n'
return returnString