UniTO/tesi/conv.py

115 lines
3.9 KiB
Python
Raw Normal View History

2020-02-21 19:13:13 +01:00
import json
2020-02-24 14:36:26 +01:00
import re
2020-02-21 19:13:13 +01:00
from sys import argv
2020-04-11 00:26:05 +02:00
try:
allsymbols = json.load(open('./unicode-latex.json'))
except:
allsymbols = json.load(open('../unicode-latex.json'))
2020-06-29 17:44:43 +02:00
mysymbols = ['', '', '', '', '', '', '', '', '', 'ε', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'ʲ', '', 'π', 'α', 'β', '', 'σ', '', '', '', '', '', '', '', '', '', '', '', 'ˡ', '', '', '', '', '', '', 'Θ', 'ρ', '', '', '' ]
2020-06-04 23:48:32 +02:00
extrasymbols = {'': '\llbracket', '': r'\rrbracket', '̸': '\neg', '¬̸': '\neg',
'': '\in ', '': '_S', '': '_T'}
2020-02-21 19:13:13 +01:00
2020-02-24 14:36:26 +01:00
symbols = {s: allsymbols[s] for s in mysymbols}
2020-04-02 14:14:39 +02:00
symbols.update(extrasymbols)
mathsymbols = {s: '$'+v+'$' for s, v in symbols.items()}
2020-02-21 19:13:13 +01:00
def read_by_char(fname):
2020-02-24 14:36:26 +01:00
# Yield character and True/False if inside mathmode block
mathmode = False
2020-04-07 21:05:08 +02:00
mathmode_begin = set(['\\begin{equation*}', '\\begin{equation}', '\[', '\\begin{mathpar}'])
mathmode_end = set(['\\end{equation*}', '\\end{equation}', '\]', '\\end{mathpar}'])
2020-02-24 14:36:26 +01:00
cnt = 0
2020-02-21 19:13:13 +01:00
with open(fname, 'r') as fp:
for line in fp.readlines():
2020-02-24 14:36:26 +01:00
cnt += 1
words = [w.strip() for w in line.split(' ')]
2020-02-24 19:46:00 +01:00
2020-02-24 14:36:26 +01:00
if mathmode_begin.intersection(words):
2020-06-04 23:48:32 +02:00
assert mathmode == False, words
2020-02-24 14:36:26 +01:00
mathmode = True
2020-02-24 19:46:00 +01:00
if mathmode_end.intersection(words):
2020-02-24 14:36:26 +01:00
assert mathmode == True, f'Line: {words}, number: {cnt}'
mathmode = False
2020-02-21 19:13:13 +01:00
for ch in line:
2020-02-24 14:36:26 +01:00
yield ch, mathmode
def convert(ch, mathmode):
if not mathmode:
return mathsymbols[ch] if ch in mathsymbols else ch
else:
return symbols[ch] if ch in symbols else ch
2020-02-21 19:13:13 +01:00
2020-03-02 14:46:37 +01:00
def latex_errors_replacements(charlist):
text = ''.join(charlist).split(' ')
2020-04-11 00:26:05 +02:00
replacements = {
'\n\end{comment}\n\end{enumerate}\n\end{enumerate}\n\n\subsection{Symbolic': '\n\end{comment}\n\n\subsection{Symbolic',
}
2020-03-02 14:46:37 +01:00
r_set = set(replacements.keys())
for word in text:
it = r_set.intersection(set([word]))
if it:
yield from replacements[it.pop()]
else:
yield from word
yield ' '
2020-04-07 21:05:08 +02:00
def ll_rr_bracket(charlist):
llrr_mode = False
for i, ch in enumerate(charlist):
if ch == '\\':
if charlist[i:i+10] == '\llbracket':
assert llrr_mode is False ; llrr_mode = True
elif charlist[i:i+10] == '\rrbracket':
assert llrr_mode is True ; llrr_mode = False
if not (llrr_mode and ch == '$'):
yield ch
2020-06-30 14:07:10 +02:00
def include_files(text):
def put_header(key):
text = '''\\begin{Verbatim}[fontsize=\\footnotesize,
frame=lines, % top and bottom rule only
framesep=2em, % separation between frame and text
rulecolor=\color{Gray},
label=\\fbox{\color{Black}REPLACEME2},
labelposition=topline,
]'''.replace('REPLACEME2', key)
return text
assert type(text) is str
result = []
text = text.split('\n')
cnt = 0
key = ';; include\_file '
for line in text:
if key in line:
cnt+=1
file = line[len(key):]
source = f"traces/{file}"
result.append(put_header(file))
with open(source, 'r') as f:
result.append(f.read())
result.append('\end{Verbatim}')
else:
result.append(line)
i = 45
return result
2020-04-07 21:05:08 +02:00
2020-02-24 14:36:26 +01:00
# convert symbols except the one requiring math mode modifiers
2020-02-24 14:37:50 +01:00
firstpass = [convert(*c) for c in read_by_char(argv[1])]
2020-03-02 14:46:37 +01:00
# remove a latex error
secondpass = latex_errors_replacements(firstpass)
2020-04-07 21:05:08 +02:00
thirdpass = ll_rr_bracket(list(secondpass))
2020-06-30 14:07:10 +02:00
fourthpass = include_files(''.join(thirdpass))
2020-02-21 19:13:13 +01:00
2020-06-30 14:07:10 +02:00
newfile = '\n'.join(fourthpass)
2020-02-21 19:13:13 +01:00
with open(argv[2], 'w') as f:
2020-02-24 14:36:26 +01:00
f.write(newfile)