__version_info__ = (0,3,9)
__version__ = '.'.join(map(str,__version_info__))
__author__ = "Matthew Young"
import re
from markdown2 import markdown
def break_tie(inline,equation):
"""If one of the delimiters is a substring of the other (e.g., $ and $$) it is possible that the two will begin at the same location. In this case we need some criteria to break the tie and decide which operation takes precedence. I've gone with the longer of the two delimiters takes priority (for example, $$ over $). This function should return a 2 for the equation block taking precedence, a 1 for the inline block. The magic looking return statement is to map 0->2 and 1->1."""
tmp=(inline.end()-inline.start() > equation.end()-equation.start())
return (tmp*3+2)%4
def markdown_safe(placeholder):
"""Is the placeholder changed by markdown? If it is, this isn't a valid placeholder."""
mdstrip=re.compile("<p>(.*)</p>\n")
md=markdown(placeholder)
mdp=mdstrip.match(md)
if mdp and mdp.group(1)==placeholder:
return True
return False
def mathdown(text):
"""Convenience function which runs the basic markdown and mathjax processing sequentially."""
tmp=sanitizeInput(text)
return reconstructMath(markdown(tmp[0]),tmp[1])
def sanitizeInput(string,inline_delims=["$","$"],equation_delims=["$$","$$"],placeholder="$0$"):
"""Given a string that will be passed to markdown, the content of the different math blocks is stripped out and replaced by a placeholder which MUST be ignored by markdown. A list is returned containing the text with placeholders and a list of the stripped out equations. Note that any pre-existing instances of the placeholder are "replaced" with themselves and a corresponding dummy entry is placed in the returned codeblock. The sanitized string can then be passed safetly through markdown and then reconstructed with reconstructMath.
There are potential four delimiters that can be specified. The left and right delimiters for inline and equation mode math. These can potentially be anything that isn't already used by markdown and is compatible with mathjax (see documentation for both).
"""
if not markdown_safe(placeholder):
raise ValueError("Placeholder %s altered by markdown processing." % placeholder)
inline_left=re.compile("(?<!\\\\)"+re.escape(inline_delims[0]))
inline_right=re.compile("(?<!\\\\)"+re.escape(inline_delims[1]))
equation_left=re.compile("(?<!\\\\)"+re.escape(equation_delims[0]))
equation_right=re.compile("(?<!\\\\)"+re.escape(equation_delims[1]))
placeholder_re = re.compile("(?<!\\\\)"+re.escape(placeholder))
placeholder_scan = placeholder_re.scanner(string)
ilscanner=[inline_left.scanner(string),inline_right.scanner(string)]
eqscanner=[equation_left.scanner(string),equation_right.scanner(string)]
scanners=[placeholder_scan,ilscanner,eqscanner]
inBlock=0
post=-1
stlen=len(string)
startmatches=[placeholder_scan.search(),ilscanner[0].search(),eqscanner[0].search()]
startpoints=[stlen,stlen,stlen]
startpoints[0]= startmatches[0].start() if startmatches[0] else stlen
startpoints[1]= startmatches[1].start() if startmatches[1] else stlen
startpoints[2]= startmatches[2].start() if startmatches[2] else stlen
terminator=-1
sanitizedString=''
codeblocks=[]
while 1:
while startmatches[0] and startmatches[0].start()<post:
startmatches[0]=placeholder_scan.search()
startpoints[0]= startmatches[0].start() if startmatches[0] else stlen
while startmatches[1] and startmatches[1].start()<post:
startmatches[1]=ilscanner[0].search()
startpoints[1]= startmatches[1].start() if startmatches[1] else stlen
while startmatches[2] and startmatches[2].start()<post:
startmatches[2]=eqscanner[0].search()
startpoints[2]= startmatches[2].start() if startmatches[2] else stlen
if startmatches[0] and min(startpoints)==startpoints[0]:
codeblocks.append('0'+placeholder)
tmp=startpoints[0]+len(placeholder)
sanitizedString = sanitizedString + string[post*(post>=0):tmp]
post=tmp
continue
elif startmatches[1] is None and startmatches[2] is None:
sanitizedString = sanitizedString + string[post*(post>=0):]
return (sanitizedString, codeblocks)
elif startmatches[1] is None:
inBlock=2
elif startmatches[2] is None:
inBlock=1
else:
inBlock = (startpoints[1] < startpoints[2]) + (startpoints[1] > startpoints[2])*2
if not inBlock:
inBlock = break_tie(startmatches[1],startmatches[2])
sanitizedString = sanitizedString+string[(post*(post>=0)):startpoints[inBlock]]
post = startmatches[inBlock].end()
while terminator<post:
endpoint=scanners[inBlock][1].search()
if endpoint is None:
sanitizedString = sanitizedString + string[startpoints[inBlock]:]
return (sanitizedString, codeblocks)
terminator=endpoint.start()
codeblocks.append(str(inBlock)+string[post:endpoint.start()])
sanitizedString = sanitizedString+placeholder
post = endpoint.end()
def reconstructMath(processedString,codeblocks,inline_delims=["$","$"],equation_delims=["$$","$$"],placeholder="$0$",htmlSafe=False):
"""This is usually the output of sanitizeInput, after having passed the output string through markdown. The delimiters given to this function should match those used to construct the string to begin with.
This will output a string containing html suitable to use with mathjax.
"<" and ">" "&" symbols in math can confuse the html interpreter because they mark the begining and end of definition blocks. To avoid issues, if htmlSafe is set to True these symbols will be replaced by ascii codes in the math blocks. The downside to this is that if anyone is already doing this, there already niced text might be mangled (I think I've taken steps to make sure it won't but not extensively tested...)"""
delims=[['',''],inline_delims,equation_delims]
placeholder_re = re.compile("(?<!\\\\)"+re.escape(placeholder))
if htmlSafe:
safeAmp=re.compile("&(?!(?:amp;|lt;|gt;))")
for i in xrange(len(codeblocks)):
codeblocks[i]=safeAmp.sub("&",codeblock[i])
codeblocks[i]=codeblocks[i].replace("<","<")
codeblocks[i]=codeblocks[i].replace(">",">")
outString=''
scan = placeholder_re.scanner(processedString)
post=0
for i in xrange(len(codeblocks)):
inBlock=int(codeblocks[i][0])
match=scan.search()
if not match:
raise ValueError("More codeblocks given than valid placeholders in text.")
outString=outString+processedString[post:match.start()]+delims[inBlock][0]+codeblocks[i][1:]+delims[inBlock][1]
post = match.end()
if post<len(processedString):
outString = outString+processedString[post:]
return outString
def findBoundaries(string):
"""A depricated function. Finds the location of string boundaries in a stupid way."""
last=''
twod=[]
oned=[]
boundary=False
inoned=False
intwod=False
for count,char in enumerate(string):
if char=="$" and last!='\\':
if inoned:
oned.append(count)
inoned=False
elif intwod:
if boundary:
twod.append(count)
intwod=False
boundary=False
else:
boundary=True
elif boundary:
twod.append(count)
intwod=True
boundary=False
else:
boundary=True
elif boundary:
if inoned:
print "THIS SHOULD NEVER HAPPEN!"
elif intwod:
pass
else:
oned.append(count-1)
inoned=True
boundary=False
last=char
if boundary:
if not (inoned or intwod):
oned.append(count)
inoned=True
return (oned,twod)