diff --git a/tesi/conv.py b/tesi/conv.py index 1f28a56..15f2be9 100644 --- a/tesi/conv.py +++ b/tesi/conv.py @@ -3,8 +3,8 @@ import re from sys import argv allsymbols = json.load(open('./unicode-latex.json')) -mysymbols = ['≡', '≠', '≼', '→', '←', '⊀', '⋠', '≺', '∀', '∈', 'ε','₀', '₂', '₁', '₃', 'ₐ', 'ₖ', 'ₘ', 'ₙ', 'ᵢ', 'ⁱ', '⋮', 'ₛ', 'ₜ', '≃', '⇔', '∧', '∅', 'ℕ', 'ⱼ', 'ʲ', '⊥', 'π', 'α', 'β', '∞', 'σ', '≤', '⊈', '∧', '∨', '∃', '⇒', '∩', '∉', '⋃' ] -extrasymbols = {'〚': '\llbracket', '〛': '\rrbracket'} +mysymbols = ['≡', '≠', '≼', '→', '←', '⊀', '⋠', '≺', '∀', 'ε', '₀', '₂', '₁', '₃', 'ₐ', 'ₖ', 'ₘ', 'ₙ', 'ᵢ', 'ⁱ', '⋮', 'ₛ', 'ₜ', '≃', '⇔', '∧', '∅', 'ℕ', 'ⱼ', 'ʲ', '⊥', 'π', 'α', 'β', '∞', 'σ', '≤', '⊈', '∧', '∨', '∃', '⇒', '∩', '∉', '⋃', 'ᵏ', 'ₗ', 'ˡ', 'ₒ', 'ᵣ', 'ᴵ', '≈' ] +extrasymbols = {'〚': '\llbracket', '〛': r'\rrbracket', '̸': '\neg', '¬̸': '\neg', '∈': '\in ', 'ₛ': '_S', 'ₜ': '_T'} symbols = {s: allsymbols[s] for s in mysymbols} symbols.update(extrasymbols) @@ -13,8 +13,8 @@ mathsymbols = {s: '$'+v+'$' for s, v in symbols.items()} def read_by_char(fname): # Yield character and True/False if inside mathmode block mathmode = False - mathmode_begin = set(['\\begin{equation*}', '\\begin{equation}']) - mathmode_end = set(['\\end{equation*}', '\\end{equation}']) + mathmode_begin = set(['\\begin{equation*}', '\\begin{equation}', '\[', '\\begin{mathpar}']) + mathmode_end = set(['\\end{equation*}', '\\end{equation}', '\]', '\\end{mathpar}']) cnt = 0 with open(fname, 'r') as fp: for line in fp.readlines(): @@ -50,11 +50,26 @@ def latex_errors_replacements(charlist): yield from word yield ' ' +def ll_rr_bracket(charlist): + llrr_mode = False + + for i, ch in enumerate(charlist): + if ch == '\\': + if charlist[i:i+10] == '\llbracket': + assert llrr_mode is False ; llrr_mode = True + elif charlist[i:i+10] == '\rrbracket': + assert llrr_mode is True ; llrr_mode = False + + if not (llrr_mode and ch == '$'): + yield ch + + # convert symbols except the one requiring math mode modifiers firstpass = [convert(*c) for c in read_by_char(argv[1])] # remove a latex error secondpass = latex_errors_replacements(firstpass) +thirdpass = ll_rr_bracket(list(secondpass)) -newfile = ''.join(secondpass) +newfile = ''.join(thirdpass) with open(argv[2], 'w') as f: f.write(newfile) diff --git a/tesi/files/symb_exec b/tesi/files/symb_exec new file mode 100644 index 0000000..e14d607 --- /dev/null +++ b/tesi/files/symb_exec @@ -0,0 +1 @@ +7VpLk+I2EP41rpoccFnyA/sIDJsckspjDsnuJaWxBahiW0QWC+yvj2RLfsmzDCwBdoYqCqyvpZb0dUvqlrHcWbb7kaH16hea4NSCTrKz3EcLwjAIxLcE9hXgRVEFLBlJKgg0wBP5ghXoKHRDElx0KnJKU07WXTCmeY5j3sEQY3TbrbagabfXNVpiA3iKUWqif5KEr9S0fKfBf8JkudI9A0dJMqQrK6BYoYRuW5A7t9wZo5RXT9luhlPJnealavfhBWk9MIZz/poGPz99+n3y6dcYzpztl+c/wH7ERiOl5TNKN2rCMc3WiAvswbbtH9TQ+V7zwegmT7BUCSx3ul0Rjp/WKJbSrXAAga14liqxUo4Zx7sXRw1qLoQPYZphzvaiimrgavq0/6jitjFGTfmqZQhdDyn7L2vNDUXiQbF0BGPQYMzyp8DyHw2qxKR5l4+CM/oPntGUMoHkNBc1pwuSpj0IpWSZi2IsSMICn0oKiXDKiRJkJElkN4MGaEzknMcGMOzawDdt4A2YAP5fJnBNE4zFPEV3QSoZL9Yo75gi+HcjV9k0xQvxMxEy33Ns33PXO9lMkiR2lXUlGvuBDUPYEi1ozkdFuTXJCiC0QQjCfoUFyki6r6oUKC9GBWZk0XQunpbl7+yDFdZjFQRUw1VCBUuNUo/eD7USMKBPz3rzbECvIMJ37MgZDxIRQNtzjHm2iACnczC3yrl8jQMJm3MSaEWOgsu+RtYcWmFoRfPKDVAmF4Kq7+jeBiTtZuM3vIS94OASBuNLrmHPXMN98nGeTOQJLllMUVGQuGsKvCP8L0mR7avSx5bkcafYKwt7XcjF4FuNZPFjW9Y0K0v7jglwYgQLPQOICdANi/HhI5cjtsT8K/WCYYO2LOYPGExjDKeIk8/d4Q5ZUfXwGyXljqOPXa/rL7VzaBXVNFWrdtTRV9RzPND3qIoHQ1HpVPW0T/ezwPCzjMg98aHcht1yAxAfQdCs3CnaILy98Kem72rxz3go/oHvKv4B7pUDoPAMAVDg2KEDjwiAXn22HzGIUGzDbnizUdgL8ImR1hjYILh8pFXuaKdEWm89Br1Q9Dl00hjHzIHQ1L2nN9+Va91KeiMl8CQHus5mdWDf/f5PlBO9OJJePLychTWCzkrvzSr6JieGxzlxqdS27bedS/uHw8E67blIPBgZ8WCMCv6w+VskVfJwuc2rXOhcO5cB5vX3e7mFCF55CxHd0i1E/Vbo6FuIqLdkg8veQoDBtwbuu8qaa+NdK2sG53hvcE+b72nzPW0+S1bjWCrLjayJZ9WJ8j0bfmsec8+G79nwPRt+o2GeB/2DYV4dfl8mzvO/Pas7Y6Z13Qxq3AvCo1MzqL6i8MIZlPkiV686ohccw4UcMV3ILWGFpQF3ON5wQnNd95kNrFnSYLd2RRKai+lcVySi2Pz5sDJT8w9Od/4f \ No newline at end of file diff --git a/tesi/files/symb_exec.png b/tesi/files/symb_exec.png new file mode 100644 index 0000000..365c1cd Binary files /dev/null and b/tesi/files/symb_exec.png differ diff --git a/tesi/files/symb_exec.svg b/tesi/files/symb_exec.svg new file mode 100644 index 0000000..b73b319 --- /dev/null +++ b/tesi/files/symb_exec.svg @@ -0,0 +1,3 @@ + + +
compat (...)
compat (...)
[1]
[1]
{ πα: -∞ < α < ∞ }
{ πα: -∞ < α < ∞ }
min (σ₁, σ₂)
min (σ₁, σ₂)
[2]
[2]
{ πσ: -∞ < (σ₁,σ₂) < ∞ ; πα: -∞ < α < β ; πβ: ...}
{ πσ: -∞ < (σ₁,σ₂) < ∞ ; πα: -∞ < α < β ; πβ: ...}
cast(u_int) (...)
cast(u_int) (...)
[3]
[3]
{ πσ: 0 ≤ (σ) < ∞ ; πα: -∞ < α < β ; πβ: ...}
{ πσ: 0 ≤ (σ) < ∞ ; πα: -∞ < α < β ; πβ: ...}
rest of the execution
rest of the exec...
Viewer does not support full SVG 1.1
\ No newline at end of file diff --git a/tesi/tesi.pdf b/tesi/tesi.pdf index 732adf7..aba1c5e 100644 Binary files a/tesi/tesi.pdf and b/tesi/tesi.pdf differ diff --git a/tesi/tesi_unicode.org b/tesi/tesi_unicode.org index f11e5c5..d718580 100644 --- a/tesi/tesi_unicode.org +++ b/tesi/tesi_unicode.org @@ -31,6 +31,7 @@ Magari prima pattern matching poi compilatore? #+LANGUAGE: en #+LaTeX_CLASS: article +#+LaTeX_HEADER: \linespread{1.25} #+LaTeX_HEADER: \usepackage{algorithm} #+LaTeX_HEADER: \usepackage{comment} #+LaTeX_HEADER: \usepackage{algpseudocode} @@ -41,11 +42,11 @@ Magari prima pattern matching poi compilatore? #+LaTeX_HEADER: \usepackage{listings} #+LaTeX_HEADER: \usepackage{color} #+LaTeX_HEADER: \usepackage{stmaryrd} -#+LaTeX_HEADER: \newcommand{\sem}[1]{{\llbracket{#1}\rrbracket}} -#+LaTeX_HEADER: \newcommand{\Equiv}[3]{\mathsf{equiv}(#1, #2, #3)} % \equiv is already taken -#+LaTeX_HEADER: \newcommand{\covers}[2]{#1 \mathrel{\mathsf{covers}} #2} -#+LaTeX_HEADER: \newcommand{\Yes}{\mathsf{Yes}} -#+LaTeX_HEADER: \newcommand{\No}[2]{\mathsf{No}(#1, #2)} +#+LaTeX_HEADER: \newcommand{\semTEX}[1]{{\llbracket{#1}\rrbracket}} +#+LaTeX_HEADER: \newcommand{\EquivTEX}[3]{\mathsf{equiv}(#1, #2, #3)} % \equiv is already taken +#+LaTeX_HEADER: \newcommand{\coversTEX}[2]{#1 \mathrel{\mathsf{covers}} #2} +#+LaTeX_HEADER: \newcommand{\YesTEX}{\mathsf{Yes}} +#+LaTeX_HEADER: \newcommand{\NoTEX}[2]{\mathsf{No}(#1, #2)} #+LaTeX_HEADER: \usepackage{comment} #+LaTeX_HEADER: \usepackage{mathpartir} #+LaTeX_HEADER: \usepackage{stmaryrd} % llbracket, rrbracket @@ -142,7 +143,6 @@ the case of a production compiler and to integrate with an existing code base. T black-box and proof only depends on our equivalence algorithm. \subsection{Our approach} -%% replace common TODO Our algorithm translates both source and target programs into a common representation, decision trees. Decision trees where chosen because they model the space of possible values at a given branch of @@ -227,12 +227,12 @@ matched by the source clauses. \texttt{Unreachable} is used when we statically know that no value can flow to that subtree. -We write $\sem{t_S}_S$ for the decision tree of the source program -$t_S$, computed by a matrix decomposition algorithm (each column +We write 〚tₛ〛ₛ for the decision tree of the source program +t_S, computed by a matrix decomposition algorithm (each column decomposition step gives a \texttt{Switch} node). It satisfies the following correctness statement: \[ -\forall t_S, \forall v_S, \quad t_S(v_S) = \sem{t_S}_S(v_S) +\forall t_s, \forall v_s, \quad t_s(v_s) = \semTEX{t_s}_s(v_s) \] Running any source value $v_S$ against the source program gives the same result as running it against the decision tree. @@ -250,10 +250,10 @@ nodes are never emitted. Guards result in branching. In comparison with the source decision trees, \texttt{Unreachable} nodes are never emitted. -We write $\sem{t_T}_T$ for the decision tree of the target program +We write $\semTEX{t_T}_T$ for the decision tree of the target program $t_T$, satisfying the following correctness statement: \[ -\forall t_T, \forall v_T, \quad t_T(v_T) = \sem{t_T}_T(v_T) +\forall t_T, \forall v_T, \quad t_T(v_T) = \semTEX{t_T}_T(v_T) \] \subsection{Equivalence checking} @@ -262,36 +262,36 @@ possible values \emph{S} and a pair of source and target decision trees and in case the two trees are not equivalent it returns a counter example. The algorithm respects the following correctness statement: \begin{comment} -% TODO: we have to define what \covers mean for readers to understand the specifications -% (or we use a simplifying lie by hiding \covers in the statements). +% TODO: we have to define what \coversTEX mean for readers to understand the specifications +% (or we use a simplifying lie by hiding \coversTEX in the statements). \end{comment} \begin{align*} - \Equiv S {C_S} {C_T} = \Yes \;\land\; \covers {C_T} S + \EquivTEX S {C_S} {C_T} = \YesTEX \;\land\; \coversTEX {C_T} S & \implies \forall v_S \approx v_T \in S,\; C_S(v_S) = C_T(v_T) \\ - \Equiv S {C_S} {C_T} = \No {v_S} {v_T} \;\land\; \covers {C_T} S + \EquivTEX S {C_S} {C_T} = \NoTEX {v_S} {v_T} \;\land\; \coversTEX {C_T} S & \implies v_S \approx v_T \in S \;\land\; C_S(v_S) \neq C_T(v_T) \end{align*} The algorithm proceeds by case analysis. Inference rules are shown. -If $S$ is empty the results is $\Yes$. +If $S$ is empty the results is $\YesTEX$. \begin{mathpar} \infer{ } - {\Equivrel \emptyset {C_S} {C_T} G} + {\EquivTEX \emptyset {C_S} {C_T} G} \end{mathpar} If the two decision trees are both terminal nodes the algorithm checks for content equality. \begin{mathpar} \infer{ } - {\Equivrel S \Failure \Failure \emptyqueue} + {\EquivTEX S \Failure \Failure \emptyqueue} \\ \infer {\trel {t_S} {t_T}} - {\Equivrel S {\Leaf {t_S}} {\Leaf {t_T}} \emptyqueue} + {\EquivTEX S {\Leaf {t_S}} {\Leaf {t_T}} \emptyqueue} \end{mathpar} @@ -305,10 +305,10 @@ hand side. \infer {C_S \in {\Leaf t, \Failure} \\ - \forall i,\; \Equivrel {(S \land a \in D_i)} {C_S} {C_i} G + \forall i,\; \EquivTEX {(S \land a \in D_i)} {C_S} {C_i} G \\ - \Equivrel {(S \land a \notin \Fam i {D_i})} {C_S} \Cfb G} - {\Equivrel S + \EquivTEX {(S \land a \notin \Fam i {D_i})} {C_S} \Cfb G} + {\EquivTEX S {C_S} {\Switch a {\Fam i {D_i} {C_i}} \Cfb} G} \end{mathpar} @@ -331,15 +331,15 @@ result in an empty intersection. If the accessors are different, \begin{mathpar} \infer {\forall i,\; - \Equivrel + \EquivTEX {(S \land a = K_i)} {C_i} {\trim {C_T} {a = K_i}} G \\ - \Equivrel + \EquivTEX {(S \land a \notin \Fam i {K_i})} \Cfb {\trim {C_T} {a \notin \Fam i {K_i}}} G } - {\Equivrel S + {\EquivTEX S {\Switch a {\Fam i {K_i, C_i}} \Cfb} {C_T} G} \end{mathpar} @@ -355,17 +355,17 @@ Termination of the algorithm is successful only when the guards queue is empty. \begin{mathpar} \infer - {\Equivrel S {C_0} {C_T} {G, (t_S = 0)} + {\EquivTEX S {C_0} {C_T} {G, (t_S = 0)} \\ - \Equivrel S {C_1} {C_T} {G, (t_S = 1)}} - {\Equivrel S + \EquivTEX S {C_1} {C_T} {G, (t_S = 1)}} + {\EquivTEX S {\Guard {t_S} {C_0} {C_1}} {C_T} G} \infer {\trel {t_S} {t_T} \\ - \Equivrel S {C_S} {C_b} G} - {\Equivrel S + \EquivTEX S {C_S} {C_b} G} + {\EquivTEX S {C_S} {\Guard {t_T} {C_0} {C_1}} {(t_S = b), G}} \end{mathpar} @@ -714,8 +714,8 @@ bad: The tree of the execution when the function is evaluated considering /int len/ our symbolic variable α, sa->sa_len as symbolic variable β and π as the set of constraints on a symbolic variable: - -#+BEGIN_SRC +[[./files/symb_exec.png]] +\begin{comment} [1] compat (...) { π_{α}: -∞ < α < ∞ } | [2] min (σ₁, σ₂) { π_{σ}: -∞ < (σ₁,σ₂) < ∞ ; π_{α}: -∞ < α < β ; π_{β}: ...} @@ -723,7 +723,7 @@ and π as the set of constraints on a symbolic variable: [3] cast(u_int) (...) { π_{σ}: 0 ≤ (σ) < ∞ ; π_{α}: -∞ < α < β ; π_{β}: ...} | ... // rest of the execution -#+END_SRC +\end{comment} We can see that at step 3 the set of possible values of the scrutinee α is bigger than the set of possible values of the input σ to the /cast/ directive, that is: π_{α} ⊈ π_{σ}. For this reason the /cast/ may fail when α is /len/ @@ -731,9 +731,9 @@ negative number, outside the domain π_{σ}. In C this would trigger undefined b overflow) that made the exploitation possible. Every step of evaluation can be modelled as the following transition: -#+BEGIN_SRC +\[ (π_{σ}, (πᵢ)ⁱ) → (π'_{σ}, (π'ᵢ)ⁱ) -#+END_SRC +\] if we express the π constraints as logical formulas we can model the execution of the program in terms of Hoare Logic. State of the computation is a Hoare triple {P}C{Q} where P and Q are @@ -742,50 +742,47 @@ constitute the assertions of the program. C is the directive being executed. The language of the assertions P is: #+BEGIN_SRC -P ::= true | false | a < b | P₁ ∧ P₂ | P₁ ∨ P₂ | ~P +P ::= true | false | a < b | P_{1} \wedge P_{2} | P_{1} \lor P_{2} | \notP #+END_SRC where a and b are numbers. In the Hoare rules assertions could also take the form of #+BEGIN_SRC -P ::= ∀i. P | ∃i. P | P₁ ⇒ P₂ +P ::= \forall i. P | \exists i. P | P_{1} \Rightarrow P_{2} #+END_SRC where i is a logical variable, but assertions of these kinds increases the complexity of the symbolic engine. Execution follows the rules of Hoare logic: - Empty statement : -\begin{verbatim} -———————————— -{P}/skip/{P} -\end{verbatim} +\begin{mathpar} +\infer{ } +{ \{P\} skip \{P\} } +\end{mathpar} - Assignment statement : The truthness of P[a/x] is equivalent to the truth of {P} after the assignment. -\begin{verbatim} -———————————— -{P[a/x]}x:=a{P} -\end{verbatim} +\begin{mathpar} +\infer{ } +{ \{P[a/x]\}x:=a\{P\} } +\end{mathpar} - Composition : c₁ and c₂ are directives that are executed in order; - {Q} is the /midcondition/. -\begin{verbatim} -{P}c₁{R}, {R}c₂{Q} -—————————————————— - {P}c₁;c₂{Q} -\end{verbatim} + {Q} is called the /midcondition/. +\begin{mathpar} +\infer { \{P\}c_1\{R\}, \{R\}c_2\{Q\} } + { \{P\}c₁;c₂\{Q\} } +\end{mathpar} - Conditional : -\begin{verbatim} - {P∧b}c₁{Q}, {P∧~b}c₂{Q} -———————————————————————— -{P}if b then c₁ else c₂{Q} -\end{verbatim} +\begin{mathpar} +\infer { \{P \wedge b \} c_1 \{Q\}, \{P\wedge\not b\}c_2\{Q\} } +{ \{P\}if b then c_1 else c_2\{Q\} } +\end{mathpar} - Loop : {P} is the loop invariant. After the loop is finished /P/ - holds and ~b caused the loop to end. -\begin{verbatim} - {P∧b}c{P} -———————————————————————— -{P}while b do c{P∧~b} -\end{verbatim} + holds and ¬̸b caused the loop to end. +\begin{mathpar} +\infer { \{P \wedge b \}c\{P\} } +{ \{P\} while b do c \{P \wedge \neg b\} } +\end{mathpar} Even if the semantics of symbolic execution engines are well defined, the user may run into different complications when applying such @@ -1176,22 +1173,24 @@ possible values \emph{S} and a pair of source and target decision trees and in case the two trees are not equivalent it returns a counter example. The algorithm respects the following correctness statement: -% TODO: we have to define what \covers mean for readers to understand the specifications -% (or we use a simplifying lie by hiding \covers in the statements). +\begin{comment} +TODO: we have to define what \coversTEX mean for readers to understand the specifications +(or we use a simplifying lie by hiding \coversTEX in the statements). +\end{comment} \begin{align*} - \Equiv S {C_S} {C_T} \emptyqueue = \Yes \;\land\; \covers {C_T} S + \EquivTEX S {C_S} {C_T} \emptyqueue = \YesTEX \;\land\; \coversTEX {C_T} S & \implies \forall v_S \approx v_T \in S,\; C_S(v_S) = C_T(v_T) \\ - \Equiv S {C_S} {C_T} \emptyqueue = \No {v_S} {v_T} \;\land\; \covers {C_T} S + \EquivTEX S {C_S} {C_T} \emptyqueue = \NoTEX {v_S} {v_T} \;\land\; \coversTEX {C_T} S & \implies v_S \approx v_T \in S \;\land\; C_S(v_S) \neq C_T(v_T) \end{align*} -Our equivalence-checking algorithm $\Equiv S {C_S} {C_T} G$ is +Our equivalence-checking algorithm $\EquivTEX S {C_S} {C_T} G$ is a exactly decision procedure for the provability of the judgment -$(\Equivrel S {C_S} {C_T} G)$, defined below. +$(\EquivTEX S {C_S} {C_T} G)$, defined below. \begin{mathpar} \begin{array}{l@{~}r@{~}l} & & \text{\emph{constraint trees}} \\ @@ -1214,26 +1213,26 @@ $(\Equivrel S {C_S} {C_T} G)$, defined below. \end{array} \\ \infer{ } - {\Equivrel \emptyset {C_S} {C_T} G} + {\EquivTEX \emptyset {C_S} {C_T} G} \infer{ } - {\Equivrel S \Failure \Failure \emptyqueue} + {\EquivTEX S \Failure \Failure \emptyqueue} \infer {\trel {t_S} {t_T}} - {\Equivrel S {\Leaf {t_S}} {\Leaf {t_T}} \emptyqueue} + {\EquivTEX S {\Leaf {t_S}} {\Leaf {t_T}} \emptyqueue} \infer {\forall i,\; - \Equivrel + \EquivTEX {(S \land a = K_i)} {C_i} {\trim {C_T} {a = K_i}} G \\ - \Equivrel + \EquivTEX {(S \land a \notin \Fam i {K_i})} \Cfb {\trim {C_T} {a \notin \Fam i {K_i}}} G } - {\Equivrel S + {\EquivTEX S {\Switch a {\Fam i {K_i, C_i}} \Cfb} {C_T} G} \begin{comment} @@ -1244,30 +1243,30 @@ $(\Equivrel S {C_S} {C_T} G)$, defined below. \infer {C_S \in {\Leaf t, \Failure} \\ - \forall i,\; \Equivrel {(S \land a \in D_i)} {C_S} {C_i} G + \forall i,\; \EquivTEX {(S \land a \in D_i)} {C_S} {C_i} G \\ - \Equivrel {(S \land a \notin \Fam i {D_i})} {C_S} \Cfb G} - {\Equivrel S + \EquivTEX {(S \land a \notin \Fam i {D_i})} {C_S} \Cfb G} + {\EquivTEX S {C_S} {\Switch a {\Fam i {D_i} {C_i}} \Cfb} G} \infer - {\Equivrel S {C_0} {C_T} {G, (t_S = 0)} + {\EquivTEX S {C_0} {C_T} {G, (t_S = 0)} \\ - \Equivrel S {C_1} {C_T} {G, (t_S = 1)}} - {\Equivrel S + \EquivTEX S {C_1} {C_T} {G, (t_S = 1)}} + {\EquivTEX S {\Guard {t_S} {C_0} {C_1}} {C_T} G} \infer {\trel {t_S} {t_T} \\ - \Equivrel S {C_S} {C_b} G} - {\Equivrel S + \EquivTEX S {C_S} {C_b} G} + {\EquivTEX S {C_S} {\Guard {t_T} {C_0} {C_1}} {(t_S = b), G}} \end{mathpar} * Correctness of the algorithm Running a program tₛ or its translation 〚tₛ〛 against an input vₛ -produces as a result a result /r/ in the following way: +produces as a result /r/ in the following way: | ( 〚tₛ〛ₛ(vₛ) = Cₛ(vₛ) ) → r | tₛ(vₛ) → r Likewise @@ -1342,6 +1341,7 @@ and an optional fallback. Failure is emitted only when the patterns don't cover the whole set of possible input values /S/. The fallback is not needed when the user doesn't use a wildcard pattern. +%%% Give example of thing | Cₛ ::= Leaf bb | Node(a, (Kᵢ → Cᵢ)^{i∈S} , C?) | a ::= Here | n.a @@ -1352,17 +1352,17 @@ Are K and Here clear here? \end{comment} We define the decomposition matrix /mₛ/ as -| SMatrix mₛ := (aⱼ)^{j∈J}, ((pᵢⱼ)^{j∈J} → bbᵢ)^{i∈I} +| SMatrix mₛ := (aⱼ)^{j∈J}, ((p_{ij})^{j∈J} → bbᵢ)^{i∈I} \begin{comment} Correggi prendendo in considerazione l'accessor \end{comment} We define the decision tree of source programs - [|tₛ|] +〚tₛ〛 in terms of the decision tree of pattern matrices - [|mₛ|] + 〚mₛ〛 by the following: - 〚((pᵢ → bbᵢ)^{i∈I}〛 := 〚(Root), (pᵢ → bbᵢ)^{i∈I} 〛 # i ∈ I + 〚((pᵢ → bbᵢ)^{i∈I}〛 := 〚(Root), (pᵢ → bbᵢ)^{i∈I} 〛 decision tree computed from pattern matrices respect the following invariant: | ∀v (vᵢ)^{i∈I} = v(aᵢ)^{i∈I} → 〚m〛(v) = m(vᵢ)^{i∈I} for m = ((aᵢ)^{i∈I}, (rᵢ)^{i∈I}) @@ -1385,10 +1385,95 @@ Base cases: 2. [| (aⱼ)ʲ, ∅ |] := Failure Regarding non base cases: -We define -| let Idx(k) = [0; arity(k)[ -| let First(∅) = ⊥ -| let First((aⱼ)ʲ) = a_{min(j)} ## where j ∈ J ≠ ∅ +Let's first define +| let Idx(k) := [0; arity(k)[ +| let First(∅) := ⊥ +| let First((aⱼ)ʲ) := a_{min(j∈J≠∅)} +\[ +m := ((a_i)^i ((p_{ij})^i \to e_j)^{ij}) +\] +\[ + (k_k)^k := headconstructor(p_{i0})^i +\] +\begin{equation} +\begin{align} + Groups(m) := ( k_k \to ((a)_{0.l})^{l \in Idx(k_k)} +++ (a_i)^{i \in I\backslash \{0\} }), \\ + ( if p_{0j} is k(q_l) then \\ + (qₗ)^{l \in Idx(k_k)} +++ (p_{ij})^{i \in I\backslash \{0\}} \to e_j \\ + if p_{0j} is \_ then \\ + (\_)^{l \in Idx(k_k)} +++ (p_{ij})^{i \in I\backslash \{0\}} \to e_j \\ + else \bot )^j ), \\ + ((a_i)^{i \in I\backslash \{0\}}, ((p_{ij})^{i \in I\backslash \{0\}} \to eⱼ if p_{0j} is \_ else \bot)^{j \in J}) +\end{align} +\end{equation} + +Groups(m) is an auxiliary function that decomposes a matrix m into +submatrices, according to the head constructor of their first pattern. +Groups(m) returns one submatrix m_r for each head constructor k that +occurs on the first row of m, plus one "wildcard submatrix" m_{wild} +that matches on all values that do not start with one of those head +constructors. + +Intuitively, m is equivalent to its decompositionin the following +sense: if the first pattern of an input vector (v_i)^i starts with one +of the head constructors k, then running (v_i)^i against m is the same +as running it against the submatrix m_k; otherwise (its head +constructor is none of the k) it is equivalent to running it against +the wildcard submatrix. + +We formalize this intuition as follows: +Lemma (Groups): +Let \[m\] be a matrix with \[Groups(m) = (k_r \to m_r)^k, m_{wild}\]. +For any value vector \[(v_i)^l\] such that \[v_0 = k(v'_l)^l\] for some +constructor k, +we have: +\[ + if k = kₖ for some k then + m(vᵢ)ⁱ = mₖ((v'ₗ)ˡ +++ (vᵢ)^{i∈I\backslash \{0\}}) + else + m(vᵢ)ⁱ = m_{wild}(vᵢ)^{i∈I\backslash \{0\}} +\] + +*** Proof: +Let \[m\] be a matrix with \[Group(m) = (k_r \to m_r)^k, m_{wild}\]. +Let \[(v_i)^i\] be an input matrix with \[v_0 = k(v'_l)^l\] for some k. +We proceed by case analysis: +- either k is one of the kₖ for some k +- or k is none of the (kₖ)ᵏ + +Both m(vᵢ)ⁱ and mₖ(vₖ)ᵏ are defined as the first matching result of +a family over each row rⱼ of a matrix + +We know, from the definition of +Groups(m), that mₖ is +\[ + ((a){0.l})^{l∈Idx(kₖ)} +++ (aᵢ)^{i∈I\backslash \{0\}}), + ( + if p_{0j} is k(qₗ) then + (qₗ)ˡ +++ (p_{ij})^{i∈I\backslash \{0\}} → eⱼ + if p_{0j} is _ then + (_)ˡ +++ (p_{ij})^{i∈I\backslash \{0\}} → eⱼ + else ⊥ + )ʲ +\] + +By definition, m(vᵢ)ⁱ is +m(vᵢ)ⁱ = First(rⱼ(vᵢ)ⁱ)ʲ for m = ((aᵢ)ⁱ, (rⱼ)ʲ) +(pᵢ)ⁱ (vᵢ)ⁱ = { + if p₀ = k(qₗ)ˡ, v₀ = k'(v'ₖ)ᵏ, k=Idx(k') and l=Idx(k) + if k ≠ k' then ⊥ + if k = k' then ((qₗ)ˡ +++ (pᵢ)^{i∈I\backslash \{0\}}) ((v'ₖ)ᵏ +++ (vᵢ)^{i∈I\backslash \{0\}}) + if p₀ = (q₁|q₂) then + First( (q₁pᵢ^{i∈I \backslash \{0\}}) vᵢ^{i∈I \backslash \{0\}}, (q₂pᵢ^{i∈I \backslash \{0\}}) vᵢ^{i∈I \backslash \{0\}} ) +} + +For this reason, if we can prova that +| ∀j, rⱼ(vᵢ)ⁱ = r'ⱼ((v'ₖ)ᵏ ++ (vᵢ)ⁱ) +it follows that +| m(vᵢ)ⁱ = mₖ((v'ₖ)ᵏ ++ (vᵢ)ⁱ) +from the above definition. + +We can also show that aᵢ = a){0.l}ˡ +++ a_{i∈I\backslash \{0\}} because v(a₀) = K(v(a){0.l})ˡ) @@ -1426,7 +1511,10 @@ an accessor → π relation (In other words???) Covering lemma: ∀a,π covers(Cₜ,S) => covers(C_{t/a→π}, (S∩a→π)) - Uᵢπⁱ ≈ Uᵢπ'∩(a→π) ≈ (Uᵢπ')∩(a→π) %% # TODO swap π and π' + Uᵢπⁱ ≈ Uᵢπ'∩(a→π) ≈ (Uᵢπ')∩(a→π) %% +\begin{comment} +Should I swap π and π' +\end{comment} \subsubsection Proof of equivalence checking The equivalence checking algorithm takes as parameters an input space @@ -1462,12 +1550,16 @@ TODO: explain: \end{comment} We proceed by case analysis: -1. equiv(∅, Cₛ, Cₜ) := Yes \begin{comment} -Devo spiegarlo? +I start numbering from zero to leave the numbers as they were on the blackboard, were we skipped some things +I think the unreachable case should go at the end. \end{comment} -In the other subcases S is always non-empty. 0. in case of unreachable: Cₛ(vₛ) = Absurd(Unreachable) ≠ Cₜ(vₜ) ∀vₛ,vₜ +1. In the case of an empty input space +| equiv(∅, Cₛ, Cₜ) := Yes +and that is trivial to prove because there is no pair of values (vₛ, vₜ) that could be +tested against the decision trees. +In the other subcases S is always non-empty. 2. equiv(S, Failure, Failure) := Yes the statement holds because of equality between Failure nodes in the case of every possible value /v/. @@ -1491,7 +1583,7 @@ In the other subcases S is always non-empty. | equiv(Sᵢ, Cₛ, Cₜᵢ) = No(vₛ, vₜ) for some minimal k∈I 4. equiv(S, Node(a, (kᵢ → Cₛᵢ)ⁱ, C_{sf}), Cₜ) := let π' = ⋃π(kᵢ) ∀i in - Forall(equiv( S∩(a→π(kᵢ)ⁱ), Cₛᵢ, C_{t/a→π(kᵢ)})ⁱ +++ equiv(S∩(a→π(kᵢ)), Cₛ, C_{/a¬̸π'})) + Forall(equiv( S∩(a→π(kᵢ)ⁱ), Cₛᵢ, C_{t/a→π(kᵢ)})ⁱ +++ equiv(S∩(a→π(kᵢ)), Cₛ, C_{/a̸¬̸π'})) The statement holds because: a. Forall(equiv( S∩(a→π(kᵢ)ⁱ), Cₛᵢ, C_{t/a→π(kᵢ)})ⁱ = Yes In the yes case let's reason by case analysis: @@ -1503,9 +1595,10 @@ In the other subcases S is always non-empty. Putting all together: Cₛ(vₛ) = Cₛᵢ(vₛ) = C_{t/a→πᵢ}(vₜ) = Cₜ(vₜ) - ii. or k∉(kᵢ)ⁱ ??? + ii. when k∉(kᵢ)ⁱ ??? b. Forall(...) = No(vₛ, vₜ) for a minimum k, equiv(Sₖ, Cₛₖ, C_{t/a→πₖ} = No(vₛ, vₜ) then Cₛₖ(vₛ) ≠ C_{t/a→πₖ}(vₜ) and C_{t/a→πₖ}(vₜ) = Cₜ(vt) - => (Cₛₖ(vₛ) = Cₛ(vₛ)) ≠ Cₜ(vₜ) # Same for fallback? + => (Cₛₖ(vₛ) = Cₛ(vₛ)) ≠ Cₜ(vₜ) + # Same for fallback? diff --git a/tesi/tesi_unicode.pdf b/tesi/tesi_unicode.pdf new file mode 100644 index 0000000..6abaa16 Binary files /dev/null and b/tesi/tesi_unicode.pdf differ