import json import re from sys import argv allsymbols = json.load(open('/home/user/UNITO/anno3/vpc/consegne/conv/unicode-latex.json')) mysymbols = ['≡', '≠', '≼', '→', '←', '⊀', '⋠', '≺', '∀', 'ε', '₀', '₂', '₁', '₃', '₄', '₅', 'ₐ', 'ₖ', 'ᵥ', 'ₘ', 'ₙ', 'ᵢ', 'ⁱ', '⋮', 'ₛ', 'ₜ', '≃', '⇔', '∧', '∅', 'ℕ', 'ⱼ', 'ʲ', '⊥', 'π', 'α', 'β', '∞', 'σ', '≤', '⊈', '∧', '∨', '∃', '⇒', '∩', '∉', '⋃', 'ᵏ', 'ₗ', 'ˡ', 'ₒ', 'ᵣ', 'ᴵ', '≈', '⊆', '↦', '∪', '⊂', '℘', 'ᶜ', '⁺', 'ⁿ', 'Σ', '⁻', '∑', 'ₚ', 'τ' ] extrasymbols = {'∈': '\in', '〚': '\llbracket', '〛': r'\rrbracket', '̸': '\neg', '¬̸': '\neg', 'ℤ': '\mathbb{Z}'} symbols = {s: allsymbols[s] for s in mysymbols} symbols.update(extrasymbols) mathsymbols = {s: '$'+v+'$' for s, v in symbols.items()} def read_by_char(fname): # Yield character and True/False if inside mathmode block mathmode = False mathmode_begin = set(['\\begin{equation*}', '\\begin{equation}', '\[', '\\begin{mathpar}']) mathmode_end = set(['\\end{equation*}', '\\end{equation}', '\]', '\\end{mathpar}']) cnt = 0 with open(fname, 'r') as fp: for line in fp.readlines(): cnt += 1 words = [w.strip() for w in line.split(' ')] if mathmode_begin.intersection(words): assert mathmode == False mathmode = True if mathmode_end.intersection(words): assert mathmode == True, f'Line: {words}, number: {cnt}' mathmode = False for ch in line: yield ch, mathmode def convert(ch, mathmode): if not mathmode: return mathsymbols[ch] if ch in mathsymbols else ch else: return symbols[ch] if ch in symbols else ch def latex_errors_replacements(charlist): text = ''.join(charlist).split(' ') replacements = { '\n\end{comment}\n\end{enumerate}\n\end{enumerate}\n\n\subsection{Symbolic': '\n\end{comment}\n\n\subsection{Symbolic', } r_set = set(replacements.keys()) for word in text: it = r_set.intersection(set([word])) if it: yield from replacements[it.pop()] else: yield from word yield ' ' def ll_rr_bracket(charlist): llrr_mode = False for i, ch in enumerate(charlist): if ch == '\\': if charlist[i:i+10] == '\llbracket': assert llrr_mode is False ; llrr_mode = True elif charlist[i:i+10] == '\rrbracket': assert llrr_mode is True ; llrr_mode = False if not (llrr_mode and ch == '$'): yield ch # convert symbols except the one requiring math mode modifiers firstpass = [convert(*c) for c in read_by_char(argv[1])] # remove a latex error secondpass = latex_errors_replacements(firstpass) thirdpass = ll_rr_bracket(list(secondpass)) newfile = ''.join(thirdpass) with open(argv[2], 'w') as f: f.write(newfile)