CoCalc -- grammar.py

Views: ¹¹⁸
1
# -*- coding: utf-8 -*-
2
import os.path
3
import re
4
import sys
5
import numpy as np
6
from collections import OrderedDict
7
from itertools import product
8
from tree import Tree
9

10
class Grammar(object):
11
    """
12
    The last grammar class you will ever need. Currently under development.
13
    """
14

15
    INF = float('Inf')
16

17
    # -------------------------------------------------------------------------
18
    # Constructor and supporting functions
19

20
    def __init__(self, rules, isPcfg=False, isHnf=False):
21
        """
22
        Create a grammar object from a grammar specification.
23

24
        If grammar rules are being passed in a file, they must be written in the form
25
            A -> B C | D E
26
            F -> G H
27
        ... etc. Separate rules must be written on separate lines.
28

29
        If grammar rules are being passed as an argument, they must be written in the form
30
            "A -> B C | D E; F -> G H"
31
        ... etc. Separate rules must be separated by a semicolon (;).
32

33
        In both cases, rules with multiple right-hand sides can use a vertical bar ('|') to
34
        separate different right-hand sides, or every rule can just be written on a separate line.
35
        All sister symbols must be separated by a space (' '). The first node of the first
36
        rule will be treated as the root node internally. For now, all rules must be binary
37
        branching at most; this is enforced within the program.
38

39
        By default, this function accepts rules in CNF form and converts them to HNF.
40
        If you want to enter HNF rules using the standard bracketing practice, you must set the hnf
41
        parameter to True by passing this as a value to the function.
42

43
        Rules can be entered in HNF; if they are, the flag 'isHnf' must be set to 'True.'
44
        PCFG functionality is still under development. For now, best not to use this functionality.
45
        Some features (i.e. 'toString' methods) may not work if you do.
46

47
        This function is based on one originally written by Colin Wilson.
48
        """
49
        self.isPcfg = isPcfg
50

51
        listOfRules = []
52

53
        if os.path.isfile(rules):
54
            f = open(rules, 'r')
55
            for line in f:
56
                listOfRules.append(line)
57
            f.close()
58
        else:
59
            listOfRules = rules.split(';')
60

61
        # check that all rules approximately conform to prescribed format
62
        ruleForm = re.compile(r'.+->.+')
63
        for i in range(len(listOfRules)):
64
            listOfRules[i] = listOfRules[i].strip()
65
            if re.match(ruleForm, listOfRules[i]) == None:
66
                sys.exit('Error: input ' + listOfRules[i] + ' is not formatted correctly.')
67

68
        # this is a bit sloppy right now, but the order in which these lines are executed does matter;
69
        # there is a different form depending on whether we received HNF rules (and need to convert
70
        # to CNF) or received CNF (and need to convert to HNF)
71
        if not isHnf: # default
72
            self.cnfRules = self.createRules(listOfRules)
73
            self.hnfRules, self.branchSyms = self.cnfToHnf()
74
        else:
75
            self.hnfRules = self.createRules(listOfRules)
76
            self.cnfRules, self.branchSyms = self.hnfToCnf()
77

78
        if self.isPcfg:
79
            self.cnfRules, self.hnfRules = self.setRemainingProbabilities()
80

81
        self.hgRulesAreSet = False
82
        self.networkInfoIsSet = False
83
        self.allGridpointsAreSet = False
84
        self.zIsSet = False
85
        self.allHarmoniesAreSet = False
86

87
        # all of these will be set eventually; in the future, they might be "declared"
88
        # here (Java-style) to catch errors
89
        #self.hgWeights
90
        #self.hgBiases
91
        #self.roleNames
92
        #self.fillerNames
93
        #self.weightMatrix
94
        #self.biasVector
95
        #self.allFRbindings
96

97
    def createRules(self, listOfRules):
98
        """
99
        Create the internal representation of the CFG passed as parameter listOfRules.
100
        """
101
        maxN = 2 # binary branching
102

103
        fl = re.compile(r'^0*1?\.?\d+?$')
104

105
        rules = OrderedDict()
106
        for rule in listOfRules:
107
            splitRule = rule.split('->')        # split ruleInput into rhs and lhs
108
            lhs = splitRule[0].strip()          # rule[0] is the first piece (lhs)
109
            wholeRhs = splitRule[1].split('|')  # rule[1] is the second piece (rhs)
110

111
            for rhs in wholeRhs:
112
                rhsList = rhs.split()
113
                nChildren = len(rhsList)
114
                if self.isPcfg and re.match(fl, rhsList[0]) != None:
115
                    rhsList[0] = float(rhsList[0]) # this is a probability
116
                    nChildren -= 1
117
                if nChildren > maxN:
118
                    sys.exit('Seriously? Do you really need ' + str(len(rhsList))  + \
119
                        '-ary trees?')
120
                rules.setdefault(lhs, []).append(rhsList)
121

122
        return rules
123

124
    def cnfToHnf(self):
125
        """
126
        Convert rules in "Conventional Normal Form" to Harmonic Normal Form. (In other
127
        words, add intermediate bracketed symbols.) Must be called after self.cnfRules has
128
        been created.
129

130
        This function is based on one originally written by Colin Wilson.
131
        """
132
        hnfRules = OrderedDict()
133
        branchSyms = []
134

135
       # fl = re.compile(r'^0*1?\.?\d+?$')
136

137
        # for each left hand side
138
        b = re.compile(r'.*\[\d+\]')
139
        goodInput = True
140
        for lhs in self.cnfRules.keys():
141
            if re.match(b, lhs) != None:
142
                goodInput = False
143
            bracketIndex = 1
144
            for rhs in self.cnfRules[lhs]:
145
                # get degree of of rule (# of nonterminals)
146
                degree = len(rhs)
147
                # if there is more than one nonterminal
148
                if degree >= 1:
149
                    # create bracket symbol
150
                    newSym = lhs +'['+ str(bracketIndex) +']'
151
                    # add unique branching lhs to list
152
                    if not newSym in branchSyms:
153
                        branchSyms.append(newSym)
154

155
                    # add rules (lhs --> new and new --> rhs)
156
                    if self.isPcfg and isinstance(rhs[0], float):
157
                        hnfRules.setdefault(lhs, []).append([rhs[0], newSym])
158
                        hnfRules.setdefault(newSym, []).append(rhs[1:])
159
                    else:
160
                        hnfRules.setdefault(lhs, []).append([newSym])
161
                        hnfRules.setdefault(newSym, []).append(rhs[:])
162

163
                    #hnfRules.setdefault(newSym, []).append(rhs)
164
                    bracketIndex += 1
165

166
                # if it's a terminal, just add original rule
167
                else:
168
                    hnfRules[lhs].append(rhs)
169

170
        # issue warning if no bracketed symbols were found
171
        if not goodInput:
172
            sys.exit('Error: HNF-style bracketing detected. If you meant to enter your rules in HNF, ' + \
173
            'you must pass parameter \'True\' to setRules(); otherwise, you should avoid using HNF-style ' + \
174
            'bracketing structure in your nodes.')
175

176
        # return converted rules
177
        return hnfRules, branchSyms
178

179
    def hnfToCnf(self):
180
        """
181
        Convert rules in Harmonic Normal Form to "Conventional Normal Form." (In other
182
        words, remove intermediate bracketed symbols.) Must be called after self.hnfRules
183
        has been created.
184
        """
185
        cnfRules = OrderedDict()
186
        branchSyms = []
187

188
        # for each left hand side
189
        b = re.compile(r'.*\[\d+\]')
190
        goodInput = False
191
        for lhs in self.hnfRules.keys():
192
            if re.match(b, lhs) != None:
193
                branchSyms.append(lhs)
194
                newLhs = lhs.split('[')[0]
195
                for rhs in self.hnfRules[lhs]:
196
                    cnfRules.setdefault(newLhs, []).append(rhs[:])
197
                goodInput = True
198

199
        if not goodInput:
200
            sys.exit('Error: HNF not detected. If you did not intend to use HNF, do not pass parameter ' + \
201
            '\'True\' to setRules().')
202

203
        # return converted rules
204
        return cnfRules, branchSyms
205

206
    def hnfTreeToState(self, inputString):
207
        """
208
        Use grammar to convert a tree in HNF (input as text) to a state based on the current
209
        network settings.
210
        """
211
        if not self.networkInfoIsSet:
212
            self.setNetworkInfo()
213

214
        currentTree = Tree(inputString)
215
        frBindings = currentTree.getFRbindings()
216

217
        byRole = {}
218
        for binding in frBindings:
219
            if binding[0] not in self.fillerNames:
220
                sys.exit('Error: Invalid filler (' + binding[0] + ').')
221
            if binding[1] not in self.roleNames:
222
                sys.exit('Error: Invalid role (' + binding[1] + ').')
223
            byRole[binding[1]] = binding[0]
224

225
        state = []
226
        for role in self.roleNames:
227
            if role not in byRole:
228
                if self.padWithNulls:
229
                    sys.exit('Error: Role ' + role + ' not in tree. Check that \n(a) you entered ' + \
230
                    'your tree in HNF, \n(b) null elements in the tree are explicitly represented with ' + \
231
                    'the null symbol (' + self.nullSymbol + '), and \n(b) your tree is licensed by the ' + \
232
                    'following grammar:\n\n' + self.hnfRulesToString())
233
                else:
234
                    sys.exit('Error: Role ' + role + ' not in tree. Check that \n(a) you entered ' + \
235
                    'your tree in HNF, and \n(b) your tree is licensed by the following grammar:\n\n' + \
236
                    self.hnfRulesToString())
237
            thisRole = [0] * len(self.fillerNames)
238
            thisRole[self.fillerNames.index(byRole[role])] = 1
239
            state += thisRole
240

241
        return np.transpose(np.array(state))
242

243
    def stateToString(self, state):
244
        """
245
        Convert state stored as a column vector to a string of 1s and 0s.
246
        """
247
        returnString = np.array_str(state)          # convert array to string
248
        returnString = returnString[1:-1]           # strip off leading and trailing brackets
249
        returnString = returnString.replace(' ','') # remove spaces...
250
        returnString = returnString.replace('.','') # ... and remove periods...
251
        returnString = returnString.replace('\n','') # ... and remove newlines so we are left with a binary string
252

253
        return returnString
254

255
    # -------------------------------------------------------------------------
256
    # Probability-related functions (these are only called in the case of a PCFG)
257

258
    def setRemainingProbabilities(self):
259
        """
260
        Fix rules such that if this is a PCFG, all rules that can expand to more than
261
        one right-hand side are given an equal probability to expand to each of those right-hand
262
        sides (asically, divide up remaining probability). Make sense?
263
        """
264
        rulesSet = [self.cnfRules, self.hnfRules]
265
        for rules in rulesSet:
266
            for lhs in rules.keys():
267
                wholeRhs = rules[lhs] # a list of right-hand sides (list of lists)
268
                currentProbValues = []
269
                noProbIndices = []
270
                probToDistribute = 0
271
                for rhs in wholeRhs:
272
                    if isinstance(rhs[0], float):
273
                        if rhs[0] < 0 or rhs[0] > 1:
274
                            sys.exit('Error: Invalid probability (' + str(rules[lhs][0]) + ').')
275
                        else:
276
                            currentProbValues.append(rhs[0])
277
                    else:
278
                        noProbIndices.append(wholeRhs.index(rhs))
279
                if len(noProbIndices) != 0:
280
                    probToDivide = 1 - sum(currentProbValues)
281
                    probToDistribute = probToDivide / len(noProbIndices)
282
                    for index in noProbIndices:
283
                        wholeRhs[index].insert(0, probToDistribute)
284
                if np.abs(1 - (len(noProbIndices) * probToDistribute + sum(currentProbValues))) > 0.02:
285
                    sys.exit('Error: Probabilities do not sum to 1 (check rules beginning with ' + lhs + ')')
286

287
        return rulesSet[0], rulesSet[1]
288

289
    def adjustBiases(self):
290
        """
291
        Adds values from self.biasAdjustments to self.biasVector to influence probability.
292
        Here, the probability difference is simply added to the most likely structure.
293
        """
294
        for adjustment in self.biasAdjustments:
295
            if adjustment[1] > 0 and adjustment[1] < self.INF:
296
                frBinding = adjustment[0][0] + '/' + adjustment[0][1]
297
                self.biasVector_byFiller[self.allFRbindings.index(frBinding)] += adjustment[1]
298
                self.biasVector_byRole[self.allFRbindings.index(frBinding)] += adjustment[1]
299
                #print('adjusted frBinding ' + frBinding)
300

301
    def computeProb(self, tree, T=1):
302
        """
303
        Compute probability of tree (input as text), using all gridpoints as the sample
304
        space and T = 1 by default.
305
        """
306
        if not self.zIsSet:
307
            self.computeZ()
308

309
        return np.exp(self.getHarmony(tree) / T) / self.z
310

311
    def computeZ(self, T=1):
312
        """
313
        Computes sum_i (exp(H(tree_i)/T)) for T = 1
314
        """
315
        if not self.allHarmoniesAreSet:
316
            self.setAllHarmonies()
317

318
        allHarmonies = np.array(list(self.allHarmonies.values()))
319

320
        self.z = np.exp(allHarmonies / T).sum()
321
        self.zIsSet = True
322

323

324
    # -------------------------------------------------------------------------
325
    # Setters and supporting functions
326

327
    def setAllGridpoints(self):
328
        """
329
        Generate all gridpoints and evaluate their Harmony.
330
        """
331
        if not self.networkInfoIsSet:
332
            self.setNetworkInfo()
333

334
        nR = len(self.roleNames)
335
        nF = len(self.fillerNames)
336

337
        # generate a tuple for every possible gridpoint, storing all of them in allGrids
338
        allPoints = []
339
        for i in range(len(self.fillerNames)):
340
            currentPoint = [0] * len(self.fillerNames)
341
            currentPoint[i] = 1
342
            allPoints.append(currentPoint)
343

344
        allGridsList = list(product(allPoints, repeat=len(self.roleNames)))
345

346
        allGridsMat = np.zeros(shape=(nR * nF, len(allGridsList)))
347
        for i in range(len(allGridsList)):
348
            # the following line creates a matrix from the current tuple, reshapes it into a
349
            # single row, then transposes it to a single colum that represents this gridpoint
350
            gridCol = np.transpose(np.reshape(np.array(allGridsList[i]), (nR * nF, 1)))
351
            allGridsMat[:,i] = gridCol
352

353
        self.allGridpoints = allGridsMat
354
        self.allGridpointsAreSet = True
355

356
    def setAllHarmonies(self):
357
        """
358
        Store the harmony for every gridpoint because, why not? This may take a while to run,
359
        but will only need to be run once per grammar.
360
        """
361
        if not self.allGridpointsAreSet:
362
            self.setAllGridpoints()
363

364
        nGrids = self.allGridpoints.shape[1]
365
        allHarmonies = {}
366
        for i in range(nGrids):
367
            stateKey = self.stateToString(self.allGridpoints[:,i])
368
            allHarmonies[stateKey] = self.getHarmony(self.allGridpoints[:,i])
369

370
        self.allHarmonies = allHarmonies
371
        self.allHarmoniesAreSet = True
372

373
    def setHarmonicGrammarRules(self, maxDepth=6, useHnf=True, addNullFillers=False, nullSymbol='_'):
374
        """
375
        Create Harmonic Grammar rules based on the CFG passed as parameter
376
        ruleDictionary.
377
        """
378
        self.maxDepth = maxDepth
379
        self.useHnf = useHnf
380
        self.needNullFiller = addNullFillers
381

382
        if self.useHnf:
383
            ruleSet = self.hnfRules
384
        else:
385
            ruleSet = self.cnfRules
386

387
        start = (self.getRootNode(ruleSet), 'r')
388

389
        if addNullFillers:
390
            self.nullSymbol = nullSymbol
391
            self.nullPaddedRules = self.padWithNulls(ruleSet, self.nullSymbol)
392
            ruleSet = self.nullPaddedRules
393

394
        hgWeights = []
395
        hgBiases = []
396
        self.biasAdjustments = []
397

398
        self.hgWeights, self.hgBiases = self.expandHGrules(start, ruleSet, hgWeights, hgBiases)
399
        self.hgWeights, self.hgBiases = self.sortHGrules(self.hgWeights, self.hgBiases)
400
        self.hgRulesAreSet = True
401

402
    def padWithNulls(self, ruleSet, nullSymbol):
403
        """
404
        "Symmetrizes" the CFG grammar in ruleSet by padding projections with null symbols.
405
        For example, given the grammar
406
            S -> A; S -> B B,
407
        this function creates
408
            S -> A _; S -> B B
409
        Note that after this function is run, all parent nodes have the same number of children.
410
        """
411
        # get max number of children (max 'n')
412
        maxN = 0
413
        for lhs in ruleSet.keys():
414
            for rhs in ruleSet[lhs]:
415
                currentN = len(rhs)
416
                if currentN > maxN:
417
                    maxN = currentN
418

419
        # fill each branching rule up to n children with fillers
420
        for lhs in ruleSet.keys():
421
            if lhs in self.branchSyms:
422
                for rhs in ruleSet[lhs]:
423
                    while len(rhs) < maxN:
424
                        rhs.append(self.nullSymbol)
425

426
        return ruleSet
427

428
    def expandHGrules(self, parent, ruleSet, hgWeights, hgBiases):
429
        """
430
        Recursive function to find Harmonic Grammar rules. Stops when all possible
431
        paths through the CFG are explored, or the maximum depth is reached.
432

433
        Right now, biases are added to roles only, not filler/role bindings.
434
        """
435
        if len(parent[1]) < self.maxDepth:
436
            if parent[0] in ruleSet.keys():
437
                for rhs in ruleSet[parent[0]]:
438
                    harmonyDiff = 0
439
                    if self.isPcfg:
440
                        temp = rhs[1:]
441
                        # this currently only works for binary-branching trees
442
                        harmonyDiff = np.log(rhs[0]) - np.log(1 - rhs[0])
443
                    else:
444
                        temp = rhs[:]
445
                    if parent[1] == 'r':
446
                        hgBiases.append([parent[0], parent[1], -(len(temp))])
447
                    else:
448
                        hgBiases.append([parent[0], parent[1], -(len(temp) + 1)])
449
                    childLevel = '0' + parent[1]
450
                    for childSymbol in temp:
451
                        # format for hgRules: [[(S, r), (S[1], 0r), 2], [...], ...]
452
                        self.biasAdjustments.append([[childSymbol, childLevel], harmonyDiff])
453
                        hgWeights.append([(parent[0], parent[1]), (childSymbol, childLevel), 2])
454
                        hgWeights, hgBiases = \
455
                            self.expandHGrules((childSymbol, childLevel), ruleSet, hgWeights, hgBiases)
456
                        childLevel = str(int(childLevel[0]) + 1) + childLevel[1:]
457
            else:
458
                # handles case where we are at a terminal node (at bottom of tree)
459
                hgBiases.append([parent[0], parent[1], -1])
460
        else:
461
            # handles case where max depth was reached at a non-terminal symbol (at bottom of tree)
462
            hgBiases.append([parent[0], parent[1], -1])
463

464
        return hgWeights, hgBiases
465

466
    def sortHGrules(self, hgWeights, hgBiases):
467
        """
468
        This is pretty sloppy. So it will remain until we come up with a a more
469
        clever data structure to store the HG rules (low priority right now).
470
        """
471
        # sort hgWeights
472
        needSwapped = True
473
        while needSwapped:
474
            needSwapped = False
475
            for i in range(len(hgWeights) - 1):
476
                if len(hgWeights[i][0][1]) > len(hgWeights[i+1][0][1]):
477
                    # swap
478
                    temp = hgWeights[i]
479
                    hgWeights[i] = hgWeights[i+1]
480
                    hgWeights[i+1] = temp
481
                    needSwapped = True
482

483
        # use sets to remove duplicates
484
        hgWeightsNoDuplicates = []
485
        seen = set()
486
        for weight in hgWeights:
487
            # strings are hashable, lists are not, so we must convert to add to set
488
            stringWeight = ''.join([weight[0][0], weight[0][1], weight[1][0], weight[1][1]])
489
            if stringWeight not in seen:
490
                hgWeightsNoDuplicates.append(weight)
491
                seen.add(stringWeight)
492

493
        # sort hgBiases; remember, hgBiases are currently in the form [role, bias]
494
        needSwapped = True
495
        while needSwapped:
496
            needSwapped = False
497
            for i in range(len(hgBiases) - 1):
498
                if len(hgBiases[i][1]) > len(hgBiases[i+1][1]):
499
                    # swap
500
                    temp = hgBiases[i]
501
                    hgBiases[i] = hgBiases[i+1]
502
                    hgBiases[i+1] = temp
503
                    needSwapped = True
504

505
        # use sets to remove duplicates again
506
        hgBiasesNoDuplicates = []
507
        seen = set()
508
        for biasPair in hgBiases:
509
            # strings are hashable, lists are not, so we must convert to add to set
510
            if (biasPair[0], biasPair[1]) not in seen:
511
                hgBiasesNoDuplicates.append(biasPair)
512
                seen.add((biasPair[0], biasPair[1]))
513

514
        return hgWeightsNoDuplicates, hgBiasesNoDuplicates
515

516
    def setNetworkInfo(self, biasByFiller=True):
517
        """
518
        Set the role names, filler names, weight matrix, and bias vector for
519
        this HNF grammar.
520
        """
521
        if not self.hgRulesAreSet:
522
            self.setHarmonicGrammarRules()
523

524
        fillerNames = []
525
        roleNames = []
526
        for i in range(len(self.hgBiases)):
527
            if self.hgBiases[i][0] not in fillerNames:
528
                fillerNames.append(self.hgBiases[i][0])
529
            if self.hgBiases[i][1] not in roleNames:
530
                roleNames.append(self.hgBiases[i][1])
531

532
        if self.needNullFiller and self.nullSymbol not in fillerNames:
533
            fillerNames.append(self.nullSymbol)
534

535
        allFRbindings = []
536
        for i in range(len(roleNames)):
537
            for j in range(len(fillerNames)):
538
                allFRbindings.append(fillerNames[j] + '/'+ roleNames[i])
539

540
        weightMatrix = np.zeros((len(allFRbindings), len(allFRbindings)))
541
        for i in range(len(self.hgWeights)):
542
            index1 = allFRbindings.index(self.hgWeights[i][0][0] + '/' + self.hgWeights[i][0][1])
543
            index2 = allFRbindings.index(self.hgWeights[i][1][0] + '/' + self.hgWeights[i][1][1])
544
            weightMatrix[index1, index2] = self.hgWeights[i][2]
545
            weightMatrix[index2, index1] = self.hgWeights[i][2]
546

547
        biasVector_byFiller = np.zeros(len(allFRbindings))
548
        biasVector_byRole = np.zeros(len(allFRbindings))
549
        for i in range(len(self.hgBiases)):
550
            currentFiller = self.hgBiases[i][0]
551
            currentRole = self.hgBiases[i][1]
552
            currentBias = self.hgBiases[i][2]
553
            for j in range(len(allFRbindings)):
554
                if allFRbindings[j][:len(currentFiller)] == currentFiller:
555
                    biasVector_byFiller[j] = currentBias
556
                if allFRbindings[j][-len(currentRole):] == currentRole:
557
                    biasVector_byRole[j] = currentBias
558

559
        self.roleNames = roleNames
560
        self.fillerNames = fillerNames
561
        self.weightMatrix = weightMatrix
562
        self.biasVector_byFiller = biasVector_byFiller
563
        self.biasVector_byRole = biasVector_byRole
564
        self.allFRbindings = allFRbindings
565
        self.networkInfoIsSet = True
566

567
        if biasByFiller:
568
            self.defaultBiasVector = biasVector_byFiller
569
        else:
570
            self.defaultBiasVector = biasVector_byRole
571

572
        if self.isPcfg:
573
            self.adjustBiases()
574

575

576
    # -------------------------------------------------------------------------
577
    # Getters and supporting functions
578

579
    def getHarmony(self, state):
580
        """
581
        Calculate harmony of state input as column vector.
582
        """
583
        if not self.networkInfoIsSet:
584
            self.setNetworkInfo();
585

586
        if isinstance(state, str):
587
            stateVector = self.hnfTreeToState(state)
588
        else:
589
            stateVector = state
590

591
        hWeight = np.dot(np.dot(np.transpose(stateVector), self.weightMatrix), stateVector)
592
        hBias = np.dot(np.transpose(stateVector), self.defaultBiasVector)
593

594
        return ((0.5 * hWeight) + hBias)
595

596
    def getRootNode(self, ruleSet):
597
        """
598
        Given a dictionary of rules, find the first possible root node.
599
        """
600
        return list(ruleSet.keys())[0] # first key in ruleSet is root node
601

602
    def getTerminalNodes(self, ruleSet):
603
        """
604
        Given a dictionary of rules, find all terminal symbols.
605
        """
606
        terminals = []
607
        rhSides = []
608

609
        rhSides = ruleSet.keys()
610

611
        terminals = []
612
        for rhs in rhSides:
613
            for lhs in ruleSet[rhs]:
614
                for node in lhs:
615
                    if node not in rhSides and node not in terminals:
616
                        terminals.append(node) # must be a terminal
617

618
        return terminals
619

620
    def getNetworkInfo(self, biasByFiller=True):
621
        """
622
        Get the information needed to create a weight matrix for use in neural network
623
        computation.
624
        """
625
        if not self.networkInfoIsSet:
626
            self.setNetworkInfo()
627

628
        return self.roleNames, self.fillerNames, self.weightMatrix, self.defaultBiasVector
629

630
    def getWeight(self, binding1, binding2):
631
        """
632
        Get specific weight from weight matrix.
633
        """
634
        binding1_isValid = binding1 in self.allFRbindings
635
        binding2_isValid = binding2 in self.allFRbindings
636
        if not binding1_isValid and not binding2_isValid:
637
            sys.exit('Error: \'' + binding1 + '\' and \'' + binding2 + '\' are not valid filler/role bindings.')
638
        elif not binding1_isValid:
639
            sys.exit('Error: \'' + binding1 + '\' is not a valid filler/role binding.')
640
        elif not binding2_isValid:
641
            sys.exit('Error: \'' + binding2 + '\' is not a valid filler/role binding.')
642
        else:
643
            return self.weightMatrix[self.allFRbindings.index(binding1), self.allFRbindings.index(binding2)]
644

645
    def getBias(self, binding):
646
        """
647
        Get specific bias from bias vector.
648
        """
649
        bindingIsValid = binding in self.allFRbindings
650
        if not bindingIsValid:
651
            sys.exit('Error: \'' + binding + '\' is not a valid filler/role binding.')
652
        else:
653
            return self.defaultBiasVector[self.allFRbindings.index(binding), 0]
654

655

656
    # -------------------------------------------------------------------------
657
    # Pretty 'toString' methods
658

659
    def cnfRulesToString(self):
660
        """
661
        Gets a pretty string for the CNF rules.
662
        """
663
        nRules = 0
664
        for lhs in self.cnfRules.keys():
665
            nRules += len(self.cnfRules[lhs])
666

667
        nStringified = 0
668
        returnString = '{'
669
        for lhs in self.cnfRules.keys():
670
            for rhs in self.cnfRules[lhs]:
671
                if nStringified != 0:
672
                    returnString += ' '
673
                returnString += lhs + ' -> '
674
                for i in range(len(rhs)):
675
                    if self.isPcfg and i == 0:
676
                        returnString += str(rhs[i])
677
                    else:
678
                        returnString += rhs[i]
679
                    if i != len(rhs) - 1:
680
                        returnString += ' '
681
                if nStringified != nRules - 1:
682
                    returnString += '; \n'
683
                nStringified += 1
684
        returnString += '}'
685

686
        return returnString
687

688
    def hnfRulesToString(self):
689
        """
690
        Gets a pretty string for the HNF rules.
691
        """
692
        nRules = 0
693
        for lhs in self.hnfRules.keys():
694
            nRules += len(self.hnfRules[lhs])
695

696
        nStringified = 0
697
        returnString = '{'
698
        for lhs in self.hnfRules.keys():
699
            for rhs in self.hnfRules[lhs]:
700
                if nStringified != 0:
701
                    returnString += ' '
702
                returnString += lhs + ' -> '
703
                for i in range(len(rhs)):
704
                    if self.isPcfg and i == 0:
705
                        returnString += str(rhs[i])
706
                    else:
707
                        returnString += rhs[i]
708
                    if i != len(rhs) - 1:
709
                        returnString += ' '
710
                if nStringified != nRules - 1:
711
                    returnString += '; \n'
712
                nStringified += 1
713
        returnString += '}'
714

715
        return returnString
716

717
    def hgWeightsToString(self):
718
        """
719
        Gets a pretty string for the HG weights.
720
        """
721
        if not self.hgRulesAreSet:
722
            self.setHarmonicGrammarRules()
723

724
        returnString = '{'
725
        for i in range(len(self.hgWeights)):
726
            if i != 0:
727
                returnString += ' '
728
            returnString += '[(' + self.hgWeights[i][0][0] + '/'+ self.hgWeights[i][0][1] \
729
                + ', ' + self.hgWeights[i][1][0] + '/' + self.hgWeights[i][1][1] \
730
                + '), ' + str(self.hgWeights[i][-1]) + ']'
731
            if i != len(self.hgWeights) - 1:
732
                returnString += '; \n'
733
        returnString += '}'
734

735
        return returnString
736

737
    def hgBiasesToString(self):
738
        """
739
        Gets a pretty string for the HG biases.
740
        """
741
        if not self.hgRulesAreSet:
742
            self.setHarmonicGrammarRules()
743

744
        returnString = '{'
745
        for i in range(len(self.hgBiases)):
746
            if i != 0:
747
                returnString += ' '
748
            returnString += '[' + self.hgBiases[i][0] + ', ' + str(self.hgBiases[i][1]) + ']'
749
            if i != len(self.hgBiases) - 1:
750
                returnString += '; \n'
751
        returnString += '}'
752

753
        return returnString
754

755
    def hgRulesToString(self):
756
        """
757
        Concatenates the HG weights and biases and gets a pretty string for them.
758
        """
759
        if not self.networkInfoIsSet:
760
            self.setNetworkInfo()
761

762
        returnString = '{'
763
        for i in range(len(self.hgWeights)):
764
            if i != 0:
765
                returnString += ' '
766
            returnString += '[(' + self.hgWeights[i][0][0] + '/'+ self.hgWeights[i][0][1] \
767
                + ', ' + self.hgWeights[i][1][0] + '/' + self.hgWeights[i][1][1] \
768
                + '), ' + str(self.hgWeights[i][-1]) + '];\n'
769
        for i in range(len(self.hgBiases)):
770
            returnString += ' [' + self.hgBiases[i][0] + ', ' + str(self.hgBiases[i][1]) + ']'
771
            if i != len(self.hgBiases) - 1:
772
                returnString += '; \n'
773
        returnString += '}'
774

775
        if self.isPcfg:
776
            adjustmentsToPrint = []
777
            for adjustment in self.biasAdjustments:
778
                if adjustment[1] > 0 and adjustment[1] < self.INF:
779
                    adjustmentsToPrint.append(adjustment)
780

781
            returnString += ';\n{'
782
            for i in range(len(adjustmentsToPrint)):
783
                if i != 0:
784
                    returnString += ' '
785
                returnString += '[' + adjustmentsToPrint[i][0][0] + '/' + adjustmentsToPrint[i][0][1] + ', ' + \
786
                    str(adjustmentsToPrint[i][1]) + ']'
787
                if i != len(adjustmentsToPrint) - 1:
788
                    returnString += '; \n'
789
            returnString += '}'
790

791
        return returnString
792

793
    def biasVectorToString(self):
794
        """
795
        Gets a pretty string for the bias vector.
796
        """
797
        returnString = ''
798
        for i in range(len(self.allFRbindings)):
799
            returnString += self.allFRbindings[i] + ', ' + str(self.defaultBiasVector[i]) + '\n'
800

801
        return returnString
802

803

804