UniTO/tesi/conv.py

import json
import re
from sys import argv

try:
    allsymbols = json.load(open('./unicode-latex.json'))
except:
    allsymbols = json.load(open('../unicode-latex.json'))
    
mysymbols = ['≡', '≠', '≼', '→', '←', '⊀', '⋠', '≺', '∀',  'ε', '₀', '₂', '₁', '₃', '₄', 'ₐ', 'ₖ', 'ᵥ', 'ₘ', 'ₙ', 'ᵢ', 'ⁱ', '⋮', 'ₛ', 'ₜ', '≃', '⇔', '∧', '∅', 'ℕ', 'ⱼ', 'ʲ', '⊥', 'π', 'α', 'β', '∞', 'σ', '≤', '⊈', '∧', '∨', '∃', '⇒', '∩', '∉', '⋃', 'ᵏ', 'ₗ', 'ˡ', 'ₒ', 'ᵣ', 'ᴵ', '≈', '⊆', '↦', 'Θ', 'ρ', '⇒', '∑', '⊧' ]
extrasymbols = {'〚': '\llbracket', '〛': r'\rrbracket', '̸': '\neg', '¬̸': '\neg',
                '∈': '\in ', 'ₛ': '_S', 'ₜ': '_T'}

symbols = {s: allsymbols[s] for s in mysymbols}
symbols.update(extrasymbols)
mathsymbols = {s: '$'+v+'$' for s, v in symbols.items()}

def read_by_char(fname):
    # Yield character and True/False if inside mathmode block
    mathmode = False
    mathmode_begin = set(['\\begin{equation*}', '\\begin{equation}', '\[', '\\begin{mathpar}'])
    mathmode_end = set(['\\end{equation*}', '\\end{equation}', '\]', '\\end{mathpar}'])
    cnt = 0
    with open(fname, 'r') as fp:
        for line in fp.readlines():
            cnt += 1
            words = [w.strip() for w in line.split(' ')]

            if mathmode_begin.intersection(words):
                assert mathmode == False, words
                mathmode = True
            if mathmode_end.intersection(words):
                assert mathmode == True, f'Line: {words}, number: {cnt}'
                mathmode = False

            for ch in line:
                yield ch, mathmode

def convert(ch, mathmode):
    if not mathmode:
        return mathsymbols[ch] if ch in mathsymbols else ch
    else:
        return symbols[ch] if ch in symbols else ch

def latex_errors_replacements(charlist):
    text = ''.join(charlist).split(' ')
    replacements = {
        '\n\end{comment}\n\end{enumerate}\n\end{enumerate}\n\n\subsection{Symbolic': '\n\end{comment}\n\n\subsection{Symbolic',
    }
    r_set = set(replacements.keys())
    for word in text:
        it = r_set.intersection(set([word]))
        if it:
            yield from replacements[it.pop()]
        else:
            yield from word
        yield ' '

def ll_rr_bracket(charlist):
    llrr_mode = False

    for i, ch in enumerate(charlist):
        if ch == '\\':
            if charlist[i:i+10] == '\llbracket':
                assert llrr_mode is False ; llrr_mode = True
            elif charlist[i:i+10] == '\rrbracket':
                assert llrr_mode is True ; llrr_mode = False

        if not (llrr_mode and ch == '$'):
            yield ch

def include_files(text):
    def put_header(key):
        text = '''\\begin{Verbatim}[fontsize=\\footnotesize,
 frame=lines,  % top and bottom rule only
 framesep=2em, % separation between frame and text
 rulecolor=\color{Gray},
 label=\\fbox{\color{Black}REPLACEME2},
 labelposition=topline,
]'''.replace('REPLACEME2', key)
        return text
    
    assert type(text) is str

    result = []
    
    text = text.split('\n')
    cnt = 0
    key = ';; include\_file '
    for line in text:
        if key in line:
            cnt+=1
            file = line[len(key):]
            source = f"traces/{file}"
            result.append(put_header(file))
            with open(source, 'r') as f:
                result.append(f.read())
            result.append('\end{Verbatim}')
        else:
            result.append(line)

    i = 45
    return result

# convert symbols except the one requiring math mode modifiers
firstpass = [convert(*c) for c in read_by_char(argv[1])]
# remove a latex error
secondpass = latex_errors_replacements(firstpass)
thirdpass = ll_rr_bracket(list(secondpass))
fourthpass = include_files(''.join(thirdpass))

newfile = '\n'.join(fourthpass)
with open(argv[2], 'w') as f:
    f.write(newfile)
-												unicode latex

											
										
										
											2020-02-21 19:13:13 +01:00
+								import json
-												script conversione

											
										
										
											2020-02-24 14:36:26 +01:00
+								import re
-												unicode latex

											
										
										
											2020-02-21 19:13:13 +01:00
+								from sys import argv
-												mail gabriel e coppo

											
										
										
											2020-04-11 00:26:05 +02:00
+								try:
 								    allsymbols = json.load(open('./unicode-latex.json'))
 								except:
 								    allsymbols = json.load(open('../unicode-latex.json'))
-												tesi

											
										
										
											2020-06-29 17:44:43 +02:00
+								mysymbols = ['≡', '≠', '≼', '→', '←', '⊀', '⋠', '≺', '∀',  'ε', '₀', '₂', '₁', '₃', '₄', 'ₐ', 'ₖ', 'ᵥ', 'ₘ', 'ₙ', 'ᵢ', 'ⁱ', '⋮', 'ₛ', 'ₜ', '≃', '⇔', '∧', '∅', 'ℕ', 'ⱼ', 'ʲ', '⊥', 'π', 'α', 'β', '∞', 'σ', '≤', '⊈', '∧', '∨', '∃', '⇒', '∩', '∉', '⋃', 'ᵏ', 'ₗ', 'ˡ', 'ₒ', 'ᵣ', 'ᴵ', '≈', '⊆', '↦', 'Θ', 'ρ', '⇒', '∑', '⊧' ]
-												sono un cretino

											
										
										
											2020-06-04 23:48:32 +02:00
+								extrasymbols = {'〚': '\llbracket', '〛': r'\rrbracket', '̸': '\neg', '¬̸': '\neg',
 								                '∈': '\in ', 'ₛ': '_S', 'ₜ': '_T'}
-												unicode latex

											
										
										
											2020-02-21 19:13:13 +01:00
-												script conversione

											
										
										
											2020-02-24 14:36:26 +01:00
+								symbols = {s: allsymbols[s] for s in mysymbols}
-												latex symbols and python

											
										
										
											2020-04-02 14:14:39 +02:00
+								symbols.update(extrasymbols)
 								mathsymbols = {s: '$'+v+'$' for s, v in symbols.items()}
-												unicode latex

											
										
										
											2020-02-21 19:13:13 +01:00
 								def read_by_char(fname):
-												script conversione

											
										
										
											2020-02-24 14:36:26 +01:00
+								    # Yield character and True/False if inside mathmode block
 								    mathmode = False
-												more latex

											
										
										
											2020-04-07 21:05:08 +02:00
+								    mathmode_begin = set(['\\begin{equation*}', '\\begin{equation}', '\[', '\\begin{mathpar}'])
 								    mathmode_end = set(['\\end{equation*}', '\\end{equation}', '\]', '\\end{mathpar}'])
-												script conversione

											
										
										
											2020-02-24 14:36:26 +01:00
+								    cnt = 0
-												unicode latex

											
										
										
											2020-02-21 19:13:13 +01:00
+								    with open(fname, 'r') as fp:
 								        for line in fp.readlines():
-												script conversione

											
										
										
											2020-02-24 14:36:26 +01:00
+								            cnt += 1
 								            words = [w.strip() for w in line.split(' ')]
-												prima versione per coppo

											
										
										
											2020-02-24 19:46:00 +01:00
-												script conversione

											
										
										
											2020-02-24 14:36:26 +01:00
+								            if mathmode_begin.intersection(words):
-												sono un cretino

											
										
										
											2020-06-04 23:48:32 +02:00
+								                assert mathmode == False, words
-												script conversione

											
										
										
											2020-02-24 14:36:26 +01:00
+								                mathmode = True
-												prima versione per coppo

											
										
										
											2020-02-24 19:46:00 +01:00
+								            if mathmode_end.intersection(words):
-												script conversione

											
										
										
											2020-02-24 14:36:26 +01:00
+								                assert mathmode == True, f'Line: {words}, number: {cnt}'
 								                mathmode = False
-												unicode latex

											
										
										
											2020-02-21 19:13:13 +01:00
+								            for ch in line:
-												script conversione

											
										
										
											2020-02-24 14:36:26 +01:00
+								                yield ch, mathmode
 								def convert(ch, mathmode):
 								    if not mathmode:
 								        return mathsymbols[ch] if ch in mathsymbols else ch
 								    else:
 								        return symbols[ch] if ch in symbols else ch
-												unicode latex

											
										
										
											2020-02-21 19:13:13 +01:00
-												uff

											
										
										
											2020-03-02 14:46:37 +01:00
+								def latex_errors_replacements(charlist):
 								    text = ''.join(charlist).split(' ')
-												mail gabriel e coppo

											
										
										
											2020-04-11 00:26:05 +02:00
+								    replacements = {
 								        '\n\end{comment}\n\end{enumerate}\n\end{enumerate}\n\n\subsection{Symbolic': '\n\end{comment}\n\n\subsection{Symbolic',
 								    }
-												uff

											
										
										
											2020-03-02 14:46:37 +01:00
+								    r_set = set(replacements.keys())
 								    for word in text:
 								        it = r_set.intersection(set([word]))
 								        if it:
 								            yield from replacements[it.pop()]
 								        else:
 								            yield from word
 								        yield ' '
-												more latex

											
										
										
											2020-04-07 21:05:08 +02:00
+								def ll_rr_bracket(charlist):
 								    llrr_mode = False
 								    for i, ch in enumerate(charlist):
 								        if ch == '\\':
 								            if charlist[i:i+10] == '\llbracket':
 								                assert llrr_mode is False ; llrr_mode = True
 								            elif charlist[i:i+10] == '\rrbracket':
 								                assert llrr_mode is True ; llrr_mode = False
 								        if not (llrr_mode and ch == '$'):
 								            yield ch
-												traces

											
										
										
											2020-06-30 14:07:10 +02:00
+								def include_files(text):
 								    def put_header(key):
 								        text = '''\\begin{Verbatim}[fontsize=\\footnotesize,
 								 frame=lines,  % top and bottom rule only
 								 framesep=2em, % separation between frame and text
 								 rulecolor=\color{Gray},
 								 label=\\fbox{\color{Black}REPLACEME2},
 								 labelposition=topline,
 								]'''.replace('REPLACEME2', key)
 								        return text
 								    assert type(text) is str
 								    result = []
 								    text = text.split('\n')
 								    cnt = 0
 								    key = ';; include\_file '
 								    for line in text:
 								        if key in line:
 								            cnt+=1
 								            file = line[len(key):]
 								            source = f"traces/{file}"
 								            result.append(put_header(file))
 								            with open(source, 'r') as f:
 								                result.append(f.read())
 								            result.append('\end{Verbatim}')
 								        else:
 								            result.append(line)
 								    i = 45
 								    return result
-												more latex

											
										
										
											2020-04-07 21:05:08 +02:00
-												script conversione

											
										
										
											2020-02-24 14:36:26 +01:00
+								# convert symbols except the one requiring math mode modifiers
-												script conversione

											
										
										
											2020-02-24 14:37:50 +01:00
+								firstpass = [convert(*c) for c in read_by_char(argv[1])]
-												uff

											
										
										
											2020-03-02 14:46:37 +01:00
+								# remove a latex error
 								secondpass = latex_errors_replacements(firstpass)
-												more latex

											
										
										
											2020-04-07 21:05:08 +02:00
+								thirdpass = ll_rr_bracket(list(secondpass))
-												traces

											
										
										
											2020-06-30 14:07:10 +02:00
+								fourthpass = include_files(''.join(thirdpass))
-												unicode latex

											
										
										
											2020-02-21 19:13:13 +01:00
-												traces

											
										
										
											2020-06-30 14:07:10 +02:00
+								newfile = '\n'.join(fourthpass)
-												unicode latex

											
										
										
											2020-02-21 19:13:13 +01:00
+								with open(argv[2], 'w') as f:
-												script conversione

											
										
										
											2020-02-24 14:36:26 +01:00
+								    f.write(newfile)