2020-02-21 19:13:13 +01:00
|
|
|
|
import json
|
2020-02-24 14:36:26 +01:00
|
|
|
|
import re
|
2020-02-21 19:13:13 +01:00
|
|
|
|
from sys import argv
|
|
|
|
|
|
2020-04-11 00:26:05 +02:00
|
|
|
|
try:
|
|
|
|
|
allsymbols = json.load(open('./unicode-latex.json'))
|
|
|
|
|
except:
|
|
|
|
|
allsymbols = json.load(open('../unicode-latex.json'))
|
|
|
|
|
|
2020-04-12 17:30:03 +02:00
|
|
|
|
mysymbols = ['≡', '≠', '≼', '→', '←', '⊀', '⋠', '≺', '∀', 'ε', '₀', '₂', '₁', '₃', '₄', 'ₐ', 'ₖ', 'ᵥ', 'ₘ', 'ₙ', 'ᵢ', 'ⁱ', '⋮', 'ₛ', 'ₜ', '≃', '⇔', '∧', '∅', 'ℕ', 'ⱼ', 'ʲ', '⊥', 'π', 'α', 'β', '∞', 'σ', '≤', '⊈', '∧', '∨', '∃', '⇒', '∩', '∉', '⋃', 'ᵏ', 'ₗ', 'ˡ', 'ₒ', 'ᵣ', 'ᴵ', '≈', '⊆', '↦' ]
|
2020-06-04 23:48:32 +02:00
|
|
|
|
extrasymbols = {'〚': '\llbracket', '〛': r'\rrbracket', '̸': '\neg', '¬̸': '\neg',
|
|
|
|
|
'∈': '\in ', 'ₛ': '_S', 'ₜ': '_T'}
|
2020-02-21 19:13:13 +01:00
|
|
|
|
|
2020-02-24 14:36:26 +01:00
|
|
|
|
symbols = {s: allsymbols[s] for s in mysymbols}
|
2020-04-02 14:14:39 +02:00
|
|
|
|
symbols.update(extrasymbols)
|
|
|
|
|
mathsymbols = {s: '$'+v+'$' for s, v in symbols.items()}
|
2020-02-21 19:13:13 +01:00
|
|
|
|
|
|
|
|
|
def read_by_char(fname):
|
2020-02-24 14:36:26 +01:00
|
|
|
|
# Yield character and True/False if inside mathmode block
|
|
|
|
|
mathmode = False
|
2020-04-07 21:05:08 +02:00
|
|
|
|
mathmode_begin = set(['\\begin{equation*}', '\\begin{equation}', '\[', '\\begin{mathpar}'])
|
|
|
|
|
mathmode_end = set(['\\end{equation*}', '\\end{equation}', '\]', '\\end{mathpar}'])
|
2020-02-24 14:36:26 +01:00
|
|
|
|
cnt = 0
|
2020-02-21 19:13:13 +01:00
|
|
|
|
with open(fname, 'r') as fp:
|
|
|
|
|
for line in fp.readlines():
|
2020-02-24 14:36:26 +01:00
|
|
|
|
cnt += 1
|
|
|
|
|
words = [w.strip() for w in line.split(' ')]
|
2020-02-24 19:46:00 +01:00
|
|
|
|
|
2020-02-24 14:36:26 +01:00
|
|
|
|
if mathmode_begin.intersection(words):
|
2020-06-04 23:48:32 +02:00
|
|
|
|
assert mathmode == False, words
|
2020-02-24 14:36:26 +01:00
|
|
|
|
mathmode = True
|
2020-02-24 19:46:00 +01:00
|
|
|
|
if mathmode_end.intersection(words):
|
2020-02-24 14:36:26 +01:00
|
|
|
|
assert mathmode == True, f'Line: {words}, number: {cnt}'
|
|
|
|
|
mathmode = False
|
|
|
|
|
|
2020-02-21 19:13:13 +01:00
|
|
|
|
for ch in line:
|
2020-02-24 14:36:26 +01:00
|
|
|
|
yield ch, mathmode
|
|
|
|
|
|
|
|
|
|
def convert(ch, mathmode):
|
|
|
|
|
if not mathmode:
|
|
|
|
|
return mathsymbols[ch] if ch in mathsymbols else ch
|
|
|
|
|
else:
|
|
|
|
|
return symbols[ch] if ch in symbols else ch
|
2020-02-21 19:13:13 +01:00
|
|
|
|
|
2020-03-02 14:46:37 +01:00
|
|
|
|
def latex_errors_replacements(charlist):
|
|
|
|
|
text = ''.join(charlist).split(' ')
|
2020-04-11 00:26:05 +02:00
|
|
|
|
replacements = {
|
|
|
|
|
'\n\end{comment}\n\end{enumerate}\n\end{enumerate}\n\n\subsection{Symbolic': '\n\end{comment}\n\n\subsection{Symbolic',
|
|
|
|
|
}
|
2020-03-02 14:46:37 +01:00
|
|
|
|
r_set = set(replacements.keys())
|
|
|
|
|
for word in text:
|
|
|
|
|
it = r_set.intersection(set([word]))
|
|
|
|
|
if it:
|
|
|
|
|
yield from replacements[it.pop()]
|
|
|
|
|
else:
|
|
|
|
|
yield from word
|
|
|
|
|
yield ' '
|
|
|
|
|
|
2020-04-07 21:05:08 +02:00
|
|
|
|
def ll_rr_bracket(charlist):
|
|
|
|
|
llrr_mode = False
|
|
|
|
|
|
|
|
|
|
for i, ch in enumerate(charlist):
|
|
|
|
|
if ch == '\\':
|
|
|
|
|
if charlist[i:i+10] == '\llbracket':
|
|
|
|
|
assert llrr_mode is False ; llrr_mode = True
|
|
|
|
|
elif charlist[i:i+10] == '\rrbracket':
|
|
|
|
|
assert llrr_mode is True ; llrr_mode = False
|
|
|
|
|
|
|
|
|
|
if not (llrr_mode and ch == '$'):
|
|
|
|
|
yield ch
|
|
|
|
|
|
|
|
|
|
|
2020-02-24 14:36:26 +01:00
|
|
|
|
# convert symbols except the one requiring math mode modifiers
|
2020-02-24 14:37:50 +01:00
|
|
|
|
firstpass = [convert(*c) for c in read_by_char(argv[1])]
|
2020-03-02 14:46:37 +01:00
|
|
|
|
# remove a latex error
|
|
|
|
|
secondpass = latex_errors_replacements(firstpass)
|
2020-04-07 21:05:08 +02:00
|
|
|
|
thirdpass = ll_rr_bracket(list(secondpass))
|
2020-02-21 19:13:13 +01:00
|
|
|
|
|
2020-04-07 21:05:08 +02:00
|
|
|
|
newfile = ''.join(thirdpass)
|
2020-02-21 19:13:13 +01:00
|
|
|
|
with open(argv[2], 'w') as f:
|
2020-02-24 14:36:26 +01:00
|
|
|
|
f.write(newfile)
|