UniTO/tesi/conv.py
Francesco Mecca e4fc6b92aa inizio source
2020-03-03 17:18:40 +01:00

58 lines
2 KiB
Python

import json
import re
from sys import argv
allsymbols = json.load(open('./unicode-latex.json'))
mysymbols = ['', '', '', '', '', '', '', '', '', '', 'ε','', '', '', '', '', '', '', '', '', '', '']
symbols = {s: allsymbols[s] for s in mysymbols}
mathsymbols = {s: '$'+allsymbols[s]+'$' for s in symbols}
def read_by_char(fname):
# Yield character and True/False if inside mathmode block
mathmode = False
mathmode_begin = set(['\\begin{equation*}', '\\begin{equation}'])
mathmode_end = set(['\\end{equation*}', '\\end{equation}'])
cnt = 0
with open(fname, 'r') as fp:
for line in fp.readlines():
cnt += 1
words = [w.strip() for w in line.split(' ')]
if mathmode_begin.intersection(words):
assert mathmode == False
mathmode = True
if mathmode_end.intersection(words):
assert mathmode == True, f'Line: {words}, number: {cnt}'
mathmode = False
for ch in line:
yield ch, mathmode
def convert(ch, mathmode):
if not mathmode:
return mathsymbols[ch] if ch in mathsymbols else ch
else:
return symbols[ch] if ch in symbols else ch
def latex_errors_replacements(charlist):
text = ''.join(charlist).split(' ')
replacements = {'\n\end{comment}\n\end{enumerate}\n\end{enumerate}\n\n\subsection{Symbolic':
'\n\end{comment}\n\n\subsection{Symbolic'}
r_set = set(replacements.keys())
for word in text:
it = r_set.intersection(set([word]))
if it:
yield from replacements[it.pop()]
else:
yield from word
yield ' '
# convert symbols except the one requiring math mode modifiers
firstpass = [convert(*c) for c in read_by_char(argv[1])]
# remove a latex error
secondpass = latex_errors_replacements(firstpass)
newfile = ''.join(secondpass)
with open(argv[2], 'w') as f:
f.write(newfile)