UniTO/tesi/conv.py

46 lines
1.6 KiB
Python
Raw Normal View History

2020-02-21 19:13:13 +01:00
import json
2020-02-24 14:36:26 +01:00
import re
2020-02-21 19:13:13 +01:00
from sys import argv
allsymbols = json.load(open('./unicode-latex.json'))
2020-02-24 14:36:26 +01:00
mysymbols = ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
2020-02-21 19:13:13 +01:00
2020-02-24 14:36:26 +01:00
symbols = {s: allsymbols[s] for s in mysymbols}
mathsymbols = {s: '$'+allsymbols[s]+'$' for s in symbols}
2020-02-21 19:13:13 +01:00
def read_by_char(fname):
2020-02-24 14:36:26 +01:00
# Yield character and True/False if inside mathmode block
mathmode = False
mathmode_begin = set(['\\begin{equation*}', '\\begin{equation}'])
mathmode_end = set(['\\end{equation*}', '\\end{equation}'])
cnt = 0
2020-02-21 19:13:13 +01:00
with open(fname, 'r') as fp:
for line in fp.readlines():
2020-02-24 14:36:26 +01:00
cnt += 1
words = [w.strip() for w in line.split(' ')]
if mathmode_begin.intersection(words):
assert mathmode == False
mathmode = True
elif mathmode_end.intersection(words):
assert mathmode == True, f'Line: {words}, number: {cnt}'
mathmode = False
2020-02-21 19:13:13 +01:00
for ch in line:
2020-02-24 14:36:26 +01:00
yield ch, mathmode
def convert(ch, mathmode):
if not mathmode:
return mathsymbols[ch] if ch in mathsymbols else ch
else:
return symbols[ch] if ch in symbols else ch
2020-02-21 19:13:13 +01:00
2020-02-24 14:36:26 +01:00
# convert symbols except the one requiring math mode modifiers
# all passes produces a list of words that must be joined by ' '.join( )
firstpass = ''.join([convert(*c) for c in read_by_char(argv[1])]).split(' ')
# secondpass = insert_math(''.join(firstpass).split(' '))
# thirdpass = escape_outside_mathmode(firstpass)
2020-02-21 19:13:13 +01:00
2020-02-24 14:36:26 +01:00
newfile = ' '.join(firstpass)
2020-02-21 19:13:13 +01:00
with open(argv[2], 'w') as f:
2020-02-24 14:36:26 +01:00
f.write(newfile)