diff --git a/tesi/.gitignore b/tesi/.gitignore index 2ea43bc..bbd865c 100644 --- a/tesi/.gitignore +++ b/tesi/.gitignore @@ -2,6 +2,7 @@ tesi.tex .#tesi.tex tesi_unicode.tex +part4_unicode.tex ## Core latex/pdflatex auxiliary files: diff --git a/tesi/conv.py b/tesi/conv.py index 15f2be9..7f79a5e 100644 --- a/tesi/conv.py +++ b/tesi/conv.py @@ -2,8 +2,12 @@ import json import re from sys import argv -allsymbols = json.load(open('./unicode-latex.json')) -mysymbols = ['≡', '≠', '≼', '→', '←', '⊀', '⋠', '≺', '∀', 'ε', '₀', '₂', '₁', '₃', 'ₐ', 'ₖ', 'ₘ', 'ₙ', 'ᵢ', 'ⁱ', '⋮', 'ₛ', 'ₜ', '≃', '⇔', '∧', '∅', 'ℕ', 'ⱼ', 'ʲ', '⊥', 'π', 'α', 'β', '∞', 'σ', '≤', '⊈', '∧', '∨', '∃', '⇒', '∩', '∉', '⋃', 'ᵏ', 'ₗ', 'ˡ', 'ₒ', 'ᵣ', 'ᴵ', '≈' ] +try: + allsymbols = json.load(open('./unicode-latex.json')) +except: + allsymbols = json.load(open('../unicode-latex.json')) + +mysymbols = ['≡', '≠', '≼', '→', '←', '⊀', '⋠', '≺', '∀', 'ε', '₀', '₂', '₁', '₃', 'ₐ', 'ₖ', 'ₘ', 'ₙ', 'ᵢ', 'ⁱ', '⋮', 'ₛ', 'ₜ', '≃', '⇔', '∧', '∅', 'ℕ', 'ⱼ', 'ʲ', '⊥', 'π', 'α', 'β', '∞', 'σ', '≤', '⊈', '∧', '∨', '∃', '⇒', '∩', '∉', '⋃', 'ᵏ', 'ₗ', 'ˡ', 'ₒ', 'ᵣ', 'ᴵ', '≈', '⊆' ] extrasymbols = {'〚': '\llbracket', '〛': r'\rrbracket', '̸': '\neg', '¬̸': '\neg', '∈': '\in ', 'ₛ': '_S', 'ₜ': '_T'} symbols = {s: allsymbols[s] for s in mysymbols} @@ -39,8 +43,9 @@ def convert(ch, mathmode): def latex_errors_replacements(charlist): text = ''.join(charlist).split(' ') - replacements = {'\n\end{comment}\n\end{enumerate}\n\end{enumerate}\n\n\subsection{Symbolic': - '\n\end{comment}\n\n\subsection{Symbolic'} + replacements = { + '\n\end{comment}\n\end{enumerate}\n\end{enumerate}\n\n\subsection{Symbolic': '\n\end{comment}\n\n\subsection{Symbolic', + } r_set = set(replacements.keys()) for word in text: it = r_set.intersection(set([word])) diff --git a/tesi/gabriel/Makefile b/tesi/gabriel/Makefile new file mode 100644 index 0000000..efd61b7 --- /dev/null +++ b/tesi/gabriel/Makefile @@ -0,0 +1,22 @@ +SRC = part4.tex +AUX = part4.aux +DEL = part4.aux part4.bbl part4.blg part4.log part4.out part4_unicode.tex part4.pdf part4.tex texput.log +NAME = part4 + +part4: + @echo + @echo "=== Removing temporary files ===" + rm -f $(DEL) + @echo + @echo "=== Building from scratch ===" + emacs -batch part4_unicode.org -f org-latex-export-to-latex --kill + python3 ../conv.py part4_unicode.tex part4.tex + pdflatex $(SRC) + bibtex $(AUX) + pdflatex $(SRC) + pdflatex $(SRC) + @echo + @echo "=== All done. Generated $(NAME).pdf ===" + +clean: + rm -f $(DEL) diff --git a/tesi/gabriel/part4.org b/tesi/gabriel/part4.org deleted file mode 100644 index 5cb1a98..0000000 --- a/tesi/gabriel/part4.org +++ /dev/null @@ -1,354 +0,0 @@ -# Add headers to export only this section -* Correctness of the algorithm -Running a program tₛ or its translation 〚tₛ〛 against an input vₛ -produces as a result /r/ in the following way: -| ( 〚tₛ〛ₛ(vₛ) = Cₛ(vₛ) ) → r -| tₛ(vₛ) → r -Likewise -| ( 〚tₜ〛ₜ(vₜ) = Cₜ(vₜ) ) → r -| tₜ(vₜ) → r -where result r ::= guard list * (Match blackbox | NoMatch | Absurd) -and guard ::= blackbox. - -Having defined equivalence between two inputs of which one is -expressed in the source language and the other in the target language - vₛ ≃ vₜ (TODO define, this talks about the representation of source values in the target) - -we can define the equivalence between a couple of programs or a couple -of decision trees -| tₛ ≃ tₜ := ∀vₛ≃vₜ, tₛ(vₛ) = tₜ(vₜ) -| Cₛ ≃ Cₜ := ∀vₛ≃vₜ, Cₛ(vₛ) = Cₜ(vₜ) - -The proposed equivalence algorithm that works on a couple of -decision trees is returns either /Yes/ or /No(vₛ, vₜ)/ where vₛ and -vₜ are a couple of possible counter examples for which the constraint -trees produce a different result. - -** Statements -Theorem. We say that a translation of a source program to a decision tree -is correct when for every possible input, the source program and its -respective decision tree produces the same result - -| ∀vₛ, tₛ(vₛ) = 〚tₛ〛ₛ(vₛ) - - -Likewise, for the target language: - -| ∀vₜ, tₜ(vₜ) = 〚tₜ〛ₜ(vₜ) - -Definition: in the presence of guards we can say that two results are -equivalent modulo the guards queue, written /r₁ ≃gs r₂/, when: -| (gs₁, r₁) ≃gs (gs₂, r₂) ⇔ (gs₁, r₁) = (gs₂ ++ gs, r₂) - -Definition: we say that Cₜ covers the input space /S/, written -/covers(Cₜ, S) when every value vₛ∈S is a valid input to the -decision tree Cₜ. (TODO: rephrase) - -Theorem: Given an input space /S/ and a couple of decision trees, where -the target decision tree Cₜ covers the input space /S/, we say that -the two decision trees are equivalent when: - -| equiv(S, Cₛ, Cₜ, gs) = Yes ∧ covers(Cₜ, S) → ∀vₛ≃vₜ ∈ S, Cₛ(vₛ) ≃gs Cₜ(vₜ) - -Similarly we say that a couple of decision trees in the presence of -an input space /S/ are /not/ equivalent when: - -| equiv(S, Cₛ, Cₜ, gs) = No(vₛ,vₜ) ∧ covers(Cₜ, S) → vₛ≃vₜ ∈ S ∧ Cₛ(vₛ) ≠gs Cₜ(vₜ) - -Corollary: For a full input space /S/, that is the universe of the -target program we say: - -| equiv(S, 〚tₛ〛ₛ, 〚tₜ〛ₜ, ∅) = Yes ⇔ tₛ ≃ tₜ - - -*** Proof of the correctness of the translation from source programs to source decision trees - -We define a source term tₛ as a collection of patterns pointing to blackboxes -| tₛ ::= (p → bb)^{i∈I} - -A pattern is defined as either a constructor pattern, an or pattern or -a constant pattern -| p ::= | K(pᵢ)ⁱ, i ∈ I | (p|q) | n ∈ ℕ - -A decision tree is defined as either a Leaf, a Failure terminal or -an intermediate node with different children sharing the same accessor /a/ -and an optional fallback. -Failure is emitted only when the patterns don't cover the whole set of -possible input values /S/. The fallback is not needed when the user -doesn't use a wildcard pattern. -%%% Give example of thing - -| Cₛ ::= Leaf bb | Node(a, (Kᵢ → Cᵢ)^{i∈S} , C?) -| a ::= Here | n.a -| vₛ ::= K(vᵢ)^{i∈I} | n ∈ ℕ - -\begin{comment} -Are K and Here clear here? -\end{comment} - -We define the decomposition matrix /mₛ/ as -| SMatrix mₛ := (aⱼ)^{j∈J}, ((p_{ij})^{j∈J} → bbᵢ)^{i∈I} -\begin{comment} -Correggi prendendo in considerazione l'accessor -\end{comment} - -We define the decision tree of source programs -〚tₛ〛 -in terms of the decision tree of pattern matrices -〚mₛ〛 -by the following: -〚((pᵢ → bbᵢ)^{i∈I}〛 := 〚(Root), (pᵢ → bbᵢ)^{i∈I} 〛 - -decision tree computed from pattern matrices respect the following invariant: -| ∀v (vᵢ)^{i∈I} = v(aᵢ)^{i∈I} → 〚m〛(v) = m(vᵢ)^{i∈I} for m = ((aᵢ)^{i∈I}, (rᵢ)^{i∈I}) -where -| v(Here) = v -| K(vᵢ)ⁱ(k.a) = vₖ(a) if k ∈ [0;n[ -\begin{comment} -TODO: EXPLAIN -\end{comment} - -We proceed to show the correctness of the invariant by a case -analysys. - -Base cases: -1. [| ∅, (∅ → bbᵢ)ⁱ |] := Leaf bbᵢ where i := min(I), that is a - decision tree [|ms|] defined by an empty accessor and empty - patterns pointing to blackboxes /bbᵢ/. This respects the invariant - because a decomposition matrix in the case of empty rows returns - the first expression and we known that (Leaf bb)(v) := Match bb -2. [| (aⱼ)ʲ, ∅ |] := Failure - -Regarding non base cases: -Let's first define -| let Idx(k) := [0; arity(k)[ -| let First(∅) := ⊥ -| let First((aⱼ)ʲ) := a_{min(j∈J≠∅)} -\[ -m := ((a_i)^i ((p_{ij})^i \to e_j)^{ij}) -\] -\[ - (k_k)^k := headconstructor(p_{i0})^i -\] -\begin{equation} - Groups(m) := ( k_k \to ((a)_{0.l})^{l \in Idx(k_k)} +++ (a_i)^{i \in I\backslash \{0\} }), \\ - ( if p_{0j} is k(q_l) then \\ - (qₗ)^{l \in Idx(k_k)} +++ (p_{ij})^{i \in I\backslash \{0\}} \to e_j \\ - if p_{0j} is \_ then \\ - (\_)^{l \in Idx(k_k)} +++ (p_{ij})^{i \in I\backslash \{0\}} \to e_j \\ - else \bot )^j ), \\ - ((a_i)^{i \in I\backslash \{0\}}, ((p_{ij})^{i \in I\backslash \{0\}} \to eⱼ if p_{0j} is \_ else \bot)^{j \in J}) -\end{equation} - -Groups(m) is an auxiliary function that decomposes a matrix m into -submatrices, according to the head constructor of their first pattern. -Groups(m) returns one submatrix m_r for each head constructor k that -occurs on the first row of m, plus one "wildcard submatrix" m_{wild} -that matches on all values that do not start with one of those head -constructors. - -Intuitively, m is equivalent to its decompositionin the following -sense: if the first pattern of an input vector (v_i)^i starts with one -of the head constructors k, then running (v_i)^i against m is the same -as running it against the submatrix m_k; otherwise (its head -constructor is none of the k) it is equivalent to running it against -the wildcard submatrix. - -We formalize this intuition as follows: -Lemma (Groups): -Let \[m\] be a matrix with \[Groups(m) = (k_r \to m_r)^k, m_{wild}\]. -For any value vector \[(v_i)^l\] such that \[v_0 = k(v'_l)^l\] for some -constructor k, -we have: -\[ - if k = kₖ for some k then - m(vᵢ)ⁱ = mₖ((v'ₗ)ˡ +++ (vᵢ)^{i∈I\backslash \{0\}}) - else - m(vᵢ)ⁱ = m_{wild}(vᵢ)^{i∈I\backslash \{0\}} -\] - -*** Proof: -Let \[m\] be a matrix with \[Group(m) = (k_r \to m_r)^k, m_{wild}\]. -Let \[(v_i)^i\] be an input matrix with \[v_0 = k(v'_l)^l\] for some k. -We proceed by case analysis: -- either k is one of the kₖ for some k -- or k is none of the (kₖ)ᵏ - -Both m(vᵢ)ⁱ and mₖ(vₖ)ᵏ are defined as the first matching result of -a family over each row rⱼ of a matrix - -We know, from the definition of -Groups(m), that mₖ is -\[ - ((a){0.l})^{l∈Idx(kₖ)} +++ (aᵢ)^{i∈I\backslash \{0\}}), - ( - if p_{0j} is k(qₗ) then - (qₗ)ˡ +++ (p_{ij})^{i∈I\backslash \{0\}} → eⱼ - if p_{0j} is _ then - (_)ˡ +++ (p_{ij})^{i∈I\backslash \{0\}} → eⱼ - else ⊥ - )ʲ -\] - -By definition, m(vᵢ)ⁱ is -m(vᵢ)ⁱ = First(rⱼ(vᵢ)ⁱ)ʲ for m = ((aᵢ)ⁱ, (rⱼ)ʲ) -(pᵢ)ⁱ (vᵢ)ⁱ = { - if p₀ = k(qₗ)ˡ, v₀ = k'(v'ₖ)ᵏ, k=Idx(k') and l=Idx(k) - if k ≠ k' then ⊥ - if k = k' then ((qₗ)ˡ +++ (pᵢ)^{i∈I\backslash \{0\}}) ((v'ₖ)ᵏ +++ (vᵢ)^{i∈I\backslash \{0\}}) - if p₀ = (q₁|q₂) then - First( (q₁pᵢ^{i∈I \backslash \{0\}}) vᵢ^{i∈I \backslash \{0\}}, (q₂pᵢ^{i∈I \backslash \{0\}}) vᵢ^{i∈I \backslash \{0\}} ) -} - -For this reason, if we can prove that -| ∀j, rⱼ(vᵢ)ⁱ = r'ⱼ((v'ₖ)ᵏ ++ (vᵢ)ⁱ) -it follows that -| m(vᵢ)ⁱ = mₖ((v'ₖ)ᵏ ++ (vᵢ)ⁱ) -from the above definition. - -We can also show that aᵢ = a_{0.l}ˡ +++ a_{i∈I\backslash \{0\}} because v(a₀) = K(v(a){0.l})ˡ) - - - - -** Proof of equivalence checking -\begin{comment} -TODO: put ^i∈I where needed -\end{comment} -\subsubsection{The trimming lemma} -The trimming lemma allows to reduce the size of a decision tree given -an accessor → π relation (TODO: expand) -| ∀vₜ ∈ (a→π), Cₜ(vₜ) = C_{t/a→π(kᵢ)}(vₜ) -We prove this by induction on Cₜ: - a. Cₜ = Leaf_{bb}: when the decision tree is a leaf terminal, we - know that - | Leaf_{bb/a→π}(v) = Leaf_{bb}(v) - That means that the result of trimming on a Leaf is the Leaf itself - b. The same applies to Failure terminal - | Failure_{/a→π}(v) = Failure(v) - c. When Cₜ = Node(b, (π→Cᵢ)ⁱ)_{/a→π} then - we look at the accessor /a/ of the subtree Cᵢ and - we define πᵢ' = πᵢ if a≠b else πᵢ∩π - Trimming a switch node yields the following result: - | Node(b, (π→Cᵢ)ⁱ)_{/a→π} := Node(b, (π'ᵢ→C_{i/a→π})ⁱ) - -\begin{comment} -Actually in the proof.org file I transcribed: - e. Unreachabe → ⊥ -This is not correct because you don't have Unreachable nodes in target decision trees -\end{comment} - -For the trimming lemma we have to prove that running the value vₜ against -the decistion tree Cₜ is the same as running vₜ against the tree -C_{trim} that is the result of the trimming operation on Cₜ -| Cₜ(vₜ) = C_{trim}(vₜ) = Node(b, (πᵢ'→C_{i/a→π})ⁱ)(vₜ) -We can reason by first noting that when vₜ∉(b→πᵢ)ⁱ the node must be a Failure node. -In the case where ∃k| vₜ∈(b→πₖ) then we can prove that -| C_{k/a→π}(vₜ) = Node(b, (πᵢ'→C_{i/a→π})ⁱ)(vₜ) -because when a ≠ b then πₖ'= πₖ and this means that vₜ∈πₖ' -while when a = b then πₖ'=(πₖ∩π) and vt∈πₖ' because: -- by the hypothesis, vₜ∈π -- we are in the case where vₜ∈πₖ -So vₜ ∈ πₖ' and by induction -| Cₖ(vₜ) = C_{k/a→π}(vₜ) -We also know that ∀vₜ∈(b→πₖ) → Cₜ(vₜ) = Cₖ(vₜ) -By putting together the last two steps, we have proven the trimming -lemma. - -\begin{comment} -TODO: what should I say about covering??? I swap π and π' -Covering lemma: - ∀a,π covers(Cₜ,S) → covers(C_{t/a→π}, (S∩a→π)) - Uᵢπⁱ ≈ Uᵢπ'∩(a→π) ≈ (Uᵢπ')∩(a→π) %% - - -%%%%%%% Also: Should I swap π and π' ? -\end{comment} - -\subsubsection{Equivalence checking} -The equivalence checking algorithm takes as parameters an input space -/S/, a source decision tree /Cₛ/ and a target decision tree /Cₜ/: -| equiv(S, Cₛ, Cₜ) → Yes | No(vₛ, vₜ) - -When the algorithm returns Yes and the input space is covered by /Cₛ/ -we can say that the couple of decision trees are the same for -every couple of source value /vₛ/ and target value /vₜ/ that are equivalent. -\begin{comment} -Define "covered" -Is it correct to say the same? How to correctly distinguish in words ≃ and = ? -\end{comment} -| equiv(S, Cₛ, Cₜ) = Yes and cover(Cₜ, S) → ∀ vₛ ≃ vₜ∈S ∧ Cₛ(vₛ) = Cₜ(vₜ) -In the case where the algorithm returns No we have at least a couple -of counter example values vₛ and vₜ for which the two decision trees -outputs a different result. -| equiv(S, Cₛ, Cₜ) = No(vₛ,vₜ) and cover(Cₜ, S) → ∀ vₛ ≃ vₜ∈S ∧ Cₛ(vₛ) ≠ Cₜ(vₜ) - -We define the following -| Forall(Yes) = Yes -| Forall(Yes::l) = Forall(l) -| Forall(No(vₛ,vₜ)::_) = No(vₛ,vₜ) -There exists and are injective: -| int(k) ∈ ℕ (arity(k) = 0) -| tag(k) ∈ ℕ (arity(k) > 0) -| π(k) = {n\vert int(k) = n} x {n\vert tag(k) = n} -where k is a constructor. - -\begin{comment} -TODO: explain: -∀v∈a→π, C_{/a→π}(v) = C(v) -\end{comment} - -We proceed by case analysis: -\begin{comment} -I start numbering from zero to leave the numbers as they were on the blackboard, were we skipped some things -I think the unreachable case should go at the end. -\end{comment} -0. in case of unreachable: -| Cₛ(vₛ) = Absurd(Unreachable) ≠ Cₜ(vₜ) ∀vₛ,vₜ -1. In the case of an empty input space - | equiv(∅, Cₛ, Cₜ) := Yes - and that is trivial to prove because there is no pair of values (vₛ, vₜ) that could be - tested against the decision trees. - In the other subcases S is always non-empty. -2. When there are /Failure/ nodes at both sides the result is /Yes/: - |equiv(S, Failure, Failure) := Yes - Given that ∀v, Failure(v) = Failure, the statement holds. -3. When we have a Leaf or a Failure at the left side: - | equiv(S, Failure as Cₛ, Node(a, (πᵢ → Cₜᵢ)ⁱ)) := Forall(equiv( S∩a→π(kᵢ)), Cₛ, Cₜᵢ)ⁱ) - | equiv(S, Leaf bbₛ as Cₛ, Node(a, (πᵢ → Cₜᵢ)ⁱ)) := Forall(equiv( S∩a→π(kᵢ)), Cₛ, Cₜᵢ)ⁱ) - The algorithm either returns Yes for every sub-input space Sᵢ := S∩(a→π(kᵢ)) and - subtree Cₜᵢ - | equiv(Sᵢ, Cₛ, Cₜᵢ) = Yes ∀i - or we have a counter example vₛ, vₜ for which - | vₛ≃vₜ∈Sₖ ∧ cₛ(vₛ) ≠ Cₜₖ(vₜ) - then because - | vₜ∈(a→πₖ) → Cₜ(vₜ) = Cₜₖ(vₜ) , - | vₛ≃vₜ∈S ∧ Cₛ(vₛ)≠Cₜ(vₜ) - we can say that - | equiv(Sᵢ, Cₛ, Cₜᵢ) = No(vₛ, vₜ) for some minimal k∈I -4. When we have a Node on the right we define πₙ as the domain of - values not covered but the union of the constructors kᵢ - | πₙ = ¬(⋃π(kᵢ)ⁱ) - The algorithm proceeds by trimming - | equiv(S, Node(a, (kᵢ → Cₛᵢ)ⁱ, C_{sf}), Cₜ) := - | Forall(equiv( S∩(a→π(kᵢ)ⁱ), Cₛᵢ, C_{t/a→π(kᵢ)})ⁱ +++ equiv(S∩(a→π(kᵢ)), Cₛ, C_{a→πₙ})) - The statement still holds and we show this by first analyzing the - /Yes/ case: - | Forall(equiv( S∩(a→π(kᵢ)ⁱ), Cₛᵢ, C_{t/a→π(kᵢ)})ⁱ = Yes - The constructor k is either included in the set of constructors kᵢ: - | k \vert k∈(kᵢ)ⁱ ∧ Cₛ(vₛ) = Cₛᵢ(vₛ) - We also know that - | (1) Cₛᵢ(vₛ) = C_{t/a→πᵢ}(vₜ) - | (2) C_{T/a→πᵢ}(vₜ) = Cₜ(vₜ) - (1) is true by induction and (2) is a consequence of the trimming lemma. - Putting everything together: - | Cₛ(vₛ) = Cₛᵢ(vₛ) = C_{T/a→πᵢ}(vₜ) = Cₜ(vₜ) - - When the k∉(kᵢ)ⁱ [TODO] - - The auxiliary Forall function returns /No(vₛ, vₜ)/ when, for a minimum k, - | equiv(Sₖ, Cₛₖ, C_{T/a→πₖ} = No(vₛ, vₜ) - Then we can say that - | Cₛₖ(vₛ) ≠ C_{t/a→πₖ}(vₜ) - that is enough for proving that - | Cₛₖ(vₛ) ≠ (C_{t/a→πₖ}(vₜ) = Cₜ(vₜ)) diff --git a/tesi/gabriel/part4.pdf b/tesi/gabriel/part4.pdf index 3c85d5a..c744289 100644 Binary files a/tesi/gabriel/part4.pdf and b/tesi/gabriel/part4.pdf differ diff --git a/tesi/gabriel/part4.tex b/tesi/gabriel/part4.tex index 1e8c80d..05c321e 100644 --- a/tesi/gabriel/part4.tex +++ b/tesi/gabriel/part4.tex @@ -1,4 +1,4 @@ -% Created 2020-04-09 Thu 01:36 +% Created 2020-04-11 Sat 00:25 % Intended LaTeX compiler: pdflatex \documentclass[11pt]{article} \usepackage[utf8]{inputenc} @@ -29,6 +29,7 @@ \newcommand{\EquivTEX}[3]{\mathsf{equiv}(#1, #2, #3)} % \equiv is already taken \newcommand{\coversTEX}[2]{#1 \mathrel{\mathsf{covers}} #2} \newcommand{\YesTEX}{\mathsf{Yes}} +\newcommand{\DZ}{\backslash\text{\{0\}}} \newcommand{\NoTEX}[2]{\mathsf{No}(#1, #2)} \usepackage{comment} \usepackage{mathpartir} @@ -51,94 +52,262 @@ moredelim=[is][\color{red}]{/[}{]/}, xleftmargin=1em, } \lstset{aboveskip=0.4ex,belowskip=0.4ex} -\author{Francesco Mecca} -\date{} +\date{\today} +\title{} \hypersetup{ + pdfauthor={}, + pdftitle={}, pdfkeywords={}, pdfsubject={}, pdfcreator={Emacs 26.3 (Org mode 9.1.9)}, pdflang={English}} \begin{document} -\maketitle -\begin{comment} -TODO: neg is parsed incorrectly -TODO: chiedi a Gabriel se T e S vanno bene, ma prima controlla che siano coerenti -\section{{\bfseries\sffamily TODO} Scaletta [1/6]} -\label{sec:org530707f} +\section{Translation validation of the Pattern Matching Compiler} +\label{sec:org45bb046} + +\subsection{Source program} +\label{sec:orgf4d7ca7} +The algorithm takes as its input a source program and translates it +into an algebraic data structure which type we call \emph{decision\_tree}. + +\begin{verbatim} +type decision_tree = + | Unreachable + | Failure + | Leaf of source_expr + | Guard of source_blackbox * decision_tree * decision_tree + | Switch of accessor * (constructor * decision_tree) list * decision_tree +\end{verbatim} + +Unreachable, Leaf of source\_expr and Failure are the terminals of the three. +We distinguish \begin{itemize} -\item[{$\boxtimes$}] Introduction -\item[{$\boxminus$}] Background [80\%] -\begin{itemize} -\item[{$\boxtimes$}] Low level representation -\item[{$\boxtimes$}] Lambda code [0\%] -\item[{$\boxtimes$}] Pattern matching -\item[{$\boxtimes$}] Symbolic execution -\item[{$\square$}] Translation Validation -\end{itemize} -\item[{$\square$}] Translation validation of the Pattern Matching Compiler -\begin{itemize} -\item[{$\square$}] Source translation -\begin{itemize} -\item[{$\square$}] Formal Grammar -\item[{$\square$}] Compilation of source patterns -\item[{$\square$}] Rest? -\end{itemize} -\item[{$\square$}] Target translation -\begin{itemize} -\item[{$\square$}] Formal Grammar -\item[{$\square$}] Symbolic execution of the Lambda target -\end{itemize} -\item[{$\square$}] Equivalence between source and target -\end{itemize} -\item[{$\square$}] Proof of correctness -\item[{$\square$}] Practical results +\item Unreachable: statically it is known that no value can go there +\item Failure: a value matching this part results in an error +\item Leaf: a value matching this part results into the evaluation of a +source black box of code \end{itemize} -Magari prima pattern matching poi compilatore? +The algorithm doesn't support type-declaration-based analysis +to know the list of constructors at a given type. +Let's consider some trivial examples: -\end{comment} -\section{Correctness of the algorithm} -\label{sec:org4188d38} -Running a program t$_S$ or its translation $\llbracket$t$_S$$\rrbracket$ against an input v$_S$ -produces as a result \emph{r} in the following way: +\begin{verbatim} +function true -> 1 +\end{verbatim} + +is translated to \begin{center} \begin{tabular}{l} -( $\llbracket$t$_S$$\rrbracket$$_S$(v$_S$) = C$_S$(v$_S$) ) $\to$ r\\ +Switch ([(true, Leaf 1)], Failure)\\ +\end{tabular} +\end{center} +while +\begin{verbatim} +function +true -> 1 +| false -> 2 +\end{verbatim} +will be translated to +\begin{center} +\begin{tabular}{l} +Switch ([(true, Leaf 1); (false, Leaf 2)])\\ +\end{tabular} +\end{center} + +It is possible to produce Unreachable examples by using + refutation clauses (a "dot" in the right-hand-side) +\begin{verbatim} +function +true -> 1 +| false -> 2 +| _ -> . +\end{verbatim} +that gets translated into +Switch ([(true, Leaf 1); (false, Leaf 2)], Unreachable) + +We trust this annotation, which is reasonable as the type-checker +verifies that it indeed holds. + +Guard nodes of the tree are emitted whenever a guard is found. Guards +node contains a blackbox of code that is never evaluated and two +branches, one that is taken in case the guard evaluates to true and +the other one that contains the path taken when the guard evaluates to +true. + +\begin{comment} +[ ] Finisci con Switch +[ ] Spiega del fallback +[ ] rivedi compilazione per tenere in considerazione il tuo albero invece che le Lambda +\end{comment} + +The source code of a pattern matching function has the +following form: + +\begin{center} +\begin{tabular}{l} +match variable with\\ +\(\vert{}\) pattern$_1$ \(\to\) expr$_1$\\ +\(\vert{}\) pattern$_2$ when guard \(\to\) expr$_2$\\ +\(\vert{}\) pattern$_3$ as var \(\to\) expr$_3$\\ +$\vdots$\\ +\(\vert{}\) p$_n$ \(\to\) expr$_n$\\ +\end{tabular} +\end{center} + +Patterns could or could not be exhaustive. + +Pattern matching code could also be written using the more compact form: +\begin{center} +\begin{tabular}{l} +function\\ +\(\vert{}\) pattern$_1$ \(\to\) expr$_1$\\ +\(\vert{}\) pattern$_2$ when guard \(\to\) expr$_2$\\ +\(\vert{}\) pattern$_3$ as var \(\to\) expr$_3$\\ +$\vdots$\\ +\(\vert{}\) p$_n$ \(\to\) expr$_n$\\ +\end{tabular} +\end{center} + + +This BNF grammar describes formally the grammar of the source program: + +\begin{center} +\begin{tabular}{l} +start ::= "match" id "with" patterns \(\vert{}\) "function" patterns\\ +patterns ::= (pattern0\(\vert{}\)pattern1) pattern1+\\ +;; pattern0 and pattern1 are needed to distinguish the first case in which\\ +;; we can avoid writing the optional vertical line\\ +pattern0 ::= clause\\ +pattern1 ::= "\(\vert{}\)" clause\\ +clause ::= lexpr "->" rexpr\\ +lexpr ::= rule ($\varepsilon$\(\vert{}\)condition)\\ +rexpr ::= \_code ;; arbitrary code\\ +rule ::= wildcard\(\vert{}\)variable\(\vert{}\)constructor\_pattern\(\vert{}\)\{\}or\_pattern ;;\\ +;; rules\\ +wildcard ::= "\_"\\ +variable ::= identifier\\ +constructor\_pattern ::= constructor (rule\(\vert{}\)$\varepsilon$) (assignment\(\vert{}\)$\varepsilon$)\\ +constructor ::= int\(\vert{}\)float\(\vert{}\)char\(\vert{}\)string\(\vert{}\)bool \(\vert{}\)unit\(\vert{}\)record\(\vert{}\)exn\(\vert{}\)objects\(\vert{}\)ref \(\vert{}\)list\(\vert{}\)tuple\(\vert{}\)array\(\vert{}\)variant\(\vert{}\)parameterized\_variant ;; data types\\ +or\_pattern ::= rule ("\(\vert{}\)" wildcard\(\vert{}\)variable\(\vert{}\)constructor\_pattern)+\\ +condition ::= "when" bexpr\\ +assignment ::= "as" id\\ +bexpr ::= \_code ;; arbitrary code\\ +\end{tabular} +\end{center} + +A source program t$_S$ is a collection of pattern clauses pointing to +\emph{bb} terms. Running a program t$_S$ against an input value v$_S$ produces as +a result \emph{r}: +\begin{center} +\begin{tabular}{l} +t$_S$ ::= (p $\to$ bb)\(^{\text{i$\in $I}}\)\\ +p ::= \(\vert{}\) K(p$_i$)$^i$, i $\in $ I \(\vert{}\) (p\(\vert{}\)q) \(\vert{}\) n $\in $ $\mathbb{N}$\\ +r ::= guard list * (Match bb \(\vert{}\) NoMatch \(\vert{}\) Absurd)\\ t$_S$(v$_S$) $\to$ r\\ \end{tabular} \end{center} -Likewise + +TODO: argument on what it means to run a source program + +\emph{guard} and \emph{bb} expressions are treated as blackboxes of OCaml code. +A sound approach for treating these blackboxes would be to inspect the +OCaml compiler during translation to Lambda code and extract the +blackboxes compiled in their Lambda representation. +This would allow to test for equality with the respective blackbox at +the target level. +Given that this level of introspection is currently not possibile, we +decided to restrict the structure of blackboxes to the following (valid) OCaml +code: + +\begin{verbatim} +external guard : 'a -> 'b = "guard" +external observe : 'a -> 'b = "observe" +\end{verbatim} + +We assume these two external functions \emph{guard} and \emph{observe} with a valid +type that lets the user pass any number of arguments to them. +All the guards are of the form \texttt{guard }, where the + are expressed using the OCaml pattern matching language. +Similarly, all the right-hand-side expressions are of the form +\texttt{observe ...} with the same constraints on arguments. + +\begin{verbatim} +type t = K1 | K2 of t (* declaration of an algebraic and recursive datatype t *) + +let _ = function + | K1 -> observe 0 + | K2 K1 -> observe 1 + | K2 x when guard x -> observe 2 + | K2 (K2 x) as y when guard x y -> observe 3 + | K2 _ -> observe 4 +\end{verbatim} + + + +In our prototype we make use of accessors to encode stored values. +\begin{minipage}{0.2\linewidth} +\begin{verbatim} +let value = 1 :: 2 :: 3 :: [] +(* that can also be written *) +let value = [] + |> List.cons 3 + |> List.cons 2 + |> List.cons 1 +\end{verbatim} +\end{minipage} +\hfill +\begin{minipage}{0.5\linewidth} +\begin{verbatim} + + + (field 0 x) = 1 + (field 0 (field 1 x)) = 2 + (field 0 (field 1 (field 1 x)) = 3 + (field 0 (field 1 (field 1 (field 1 x)) = [] +\end{verbatim} +\end{minipage} +An \emph{accessor} \emph{a} represents the +access path to a value that can be reached by deconstructing the +scrutinee. \begin{center} \begin{tabular}{l} -( $\llbracket$t$_T$$\rrbracket$$_T$(v$_T$) = C$_T$(v$_T$) ) $\to$ r\\ -t$_T$(v$_T$) $\to$ r\\ +a ::= Here \(\vert{}\) n.a\\ \end{tabular} \end{center} -where result r ::= guard list * (Match blackbox | NoMatch | Absurd) -and guard ::= blackbox. - -Having defined equivalence between two inputs of which one is -expressed in the source language and the other in the target language - v$_S$ $\simeq$ v$_T$ (TODO define, this talks about the representation of source values in the target) - -we can define the equivalence between a couple of programs or a couple -of decision trees +The above example, in encoded form: +\begin{verbatim} +Here = 1 +1.Here = 2 +1.1.Here = 3 +1.1.1.Here = [] +\end{verbatim} +In our prototype the source matrix m$_S$ is defined as follows \begin{center} \begin{tabular}{l} -t$_S$ $\simeq$ t$_T$ := $\forall$v$_S$$\simeq$v$_T$, t$_S$(v$_S$) = t$_T$(v$_T$)\\ -C$_S$ $\simeq$ C$_T$ := $\forall$v$_S$$\simeq$v$_T$, C$_S$(v$_S$) = C$_T$(v$_T$)\\ +SMatrix m$_S$ := (a$_j$)\(^{\text{j$\in $J}}\), ((p\(_{\text{ij}}\))\(^{\text{j$\in $J}}\) $\to$ bb$_i$)\(^{\text{i$\in $I}}\)\\ \end{tabular} \end{center} -The proposed equivalence algorithm that works on a couple of -decision trees is returns either \emph{Yes} or \emph{No(v$_S$, v$_T$)} where v$_S$ and -v$_T$ are a couple of possible counter examples for which the constraint -trees produce a different result. - -\subsection{Statements} -\label{sec:orgccf94c1} -Theorem. We say that a translation of a source program to a decision tree +Source matrices are used to build source decision trees C$_S$. +A decision tree is defined as either a Leaf, a Failure terminal or +an intermediate node with different children sharing the same accessor \emph{a} +and an optional fallback. +Failure is emitted only when the patterns don't cover the whole set of +possible input values \emph{S}. The fallback is not needed when the user +doesn't use a wildcard pattern. +\%\%\% Give example of thing +\begin{center} +\begin{tabular}{l} +C$_S$ ::= Leaf bb \(\vert{}\) Switch(a, (K$_i$ $\to$ C$_i$)\(^{\text{i$\in $S}}\) , C?) \(\vert{}\) Failure \(\vert{}\) Unreachable\\ +v$_S$ ::= K(v$_i$)\(^{\text{i$\in $I}}\) \(\vert{}\) n $\in $ $\mathbb{N}$\\ +\end{tabular} +\end{center} +\begin{comment} +Are K and Here clear here? +\end{comment} +We say that a translation of a source program to a decision tree is correct when for every possible input, the source program and its respective decision tree produces the same result @@ -148,121 +317,20 @@ $\forall$v$_S$, t$_S$(v$_S$) = $\llbracket$t$_S$$\rrbracket$$_S$(v$_S$)\\ \end{tabular} \end{center} - -Likewise, for the target language: - -\begin{center} -\begin{tabular}{l} -$\forall$v$_T$, t$_T$(v$_T$) = $\llbracket$t$_T$$\rrbracket$$_T$(v$_T$)\\ -\end{tabular} -\end{center} - -Definition: in the presence of guards we can say that two results are -equivalent modulo the guards queue, written \emph{r$_1$ $\simeq$gs r$_2$}, when: -\begin{center} -\begin{tabular}{l} -(gs$_1$, r$_1$) $\simeq$gs (gs$_2$, r$_2$) $\Leftrightarrow$ (gs$_1$, r$_1$) = (gs$_2$ ++ gs, r$_2$)\\ -\end{tabular} -\end{center} - -Definition: we say that C$_T$ covers the input space \emph{S}, written -/covers(C$_T$, S) when every value v$_S$$\in $S is a valid input to the -decision tree C$_T$. (TODO: rephrase) - -Theorem: Given an input space \emph{S} and a couple of decision trees, where -the target decision tree C$_T$ covers the input space \emph{S}, we say that -the two decision trees are equivalent when: - -\begin{center} -\begin{tabular}{l} -equiv(S, C$_S$, C$_T$, gs) = Yes $\wedge$ covers(C$_T$, S) $\to$ $\forall$v$_S$$\simeq$v$_T$ $\in $ S, C$_S$(v$_S$) $\simeq$gs C$_T$(v$_T$)\\ -\end{tabular} -\end{center} - -Similarly we say that a couple of decision trees in the presence of -an input space \emph{S} are \emph{not} equivalent when: - -\begin{center} -\begin{tabular}{l} -equiv(S, C$_S$, C$_T$, gs) = No(v$_S$,v$_T$) $\wedge$ covers(C$_T$, S) $\to$ v$_S$$\simeq$v$_T$ $\in $ S $\wedge$ C$_S$(v$_S$) $\ne$gs C$_T$(v$_T$)\\ -\end{tabular} -\end{center} - -Corollary: For a full input space \emph{S}, that is the universe of the -target program we say: - -\begin{center} -\begin{tabular}{l} -equiv(S, $\llbracket$t$_S$$\rrbracket$$_S$, $\llbracket$t$_T$$\rrbracket$$_T$, $\varnothing$) = Yes $\Leftrightarrow$ t$_S$ $\simeq$ t$_T$\\ -\end{tabular} -\end{center} - - -\begin{enumerate} -\item Proof of the correctness of the translation from source programs to source decision trees -\label{sec:orgfa2a152} - -We define a source term t$_S$ as a collection of patterns pointing to blackboxes -\begin{center} -\begin{tabular}{l} -t$_S$ ::= (p $\to$ bb)\(^{\text{i$\in $I}}\)\\ -\end{tabular} -\end{center} - -A pattern is defined as either a constructor pattern, an or pattern or -a constant pattern -\begin{center} -\begin{tabular}{lllll} -p ::= & K(p$_i$)$^i$, i $\in $ I & (p & q) & n $\in $ $\mathbb{N}$\\ -\end{tabular} -\end{center} - -A decision tree is defined as either a Leaf, a Failure terminal or -an intermediate node with different children sharing the same accessor \emph{a} -and an optional fallback. -Failure is emitted only when the patterns don't cover the whole set of -possible input values \emph{S}. The fallback is not needed when the user -doesn't use a wildcard pattern. -\%\%\% Give example of thing - -\begin{center} -\begin{tabular}{ll} -C$_S$ ::= Leaf bb & Node(a, (K$_i$ $\to$ C$_i$)\(^{\text{i$\in $S}}\) , C?)\\ -a ::= Here & n.a\\ -v$_S$ ::= K(v$_i$)\(^{\text{i$\in $I}}\) & n $\in $ $\mathbb{N}$\\ -\end{tabular} -\end{center} - -\begin{comment} -Are K and Here clear here? -\end{comment} - -We define the decomposition matrix \emph{m$_S$} as -\begin{center} -\begin{tabular}{l} -SMatrix m$_S$ := (a$_j$)\(^{\text{j$\in $J}}\), ((p\(_{\text{ij}}\))\(^{\text{j$\in $J}}\) $\to$ bb$_i$)\(^{\text{i$\in $I}}\)\\ -\end{tabular} -\end{center} -\begin{comment} -Correggi prendendo in considerazione l'accessor -\end{comment} - We define the decision tree of source programs $\llbracket$t$_S$$\rrbracket$ in terms of the decision tree of pattern matrices $\llbracket$m$_S$$\rrbracket$ by the following: -$\llbracket$((p$_i$ $\to$ bb$_i$)\(^{\text{i$\in $I}}\)$\rrbracket$ := $\llbracket$(Root), (p$_i$ $\to$ bb$_i$)\(^{\text{i$\in $I}}\) $\rrbracket$ - -decision tree computed from pattern matrices respect the following invariant: +\begin{center} +\begin{tabular}{l} +$\llbracket$((p$_i$ $\to$ bb$_i$)\(^{\text{i$\in $I}}\)$\rrbracket$ := $\llbracket$(Here), (p$_i$ $\to$ bb$_i$)\(^{\text{i$\in $I}}\) $\rrbracket$\\ +\end{tabular} +\end{center} +Decision tree computed from pattern matrices respect the following invariant: \begin{center} \begin{tabular}{l} $\forall$v (v$_i$)\(^{\text{i$\in $I}}\) = v(a$_i$)\(^{\text{i$\in $I}}\) $\to$ $\llbracket$m$\rrbracket$(v) = m(v$_i$)\(^{\text{i$\in $I}}\) for m = ((a$_i$)\(^{\text{i$\in $I}}\), (r$_i$)\(^{\text{i$\in $I}}\))\\ -\end{tabular} -\end{center} -where -\begin{center} -\begin{tabular}{l} v(Here) = v\\ K(v$_i$)$^i$(k.a) = v$_k$(a) if k $\in $ [0;n[\\ \end{tabular} @@ -276,12 +344,12 @@ analysys. Base cases: \begin{enumerate} -\item\relax [| $\varnothing$, ($\varnothing$ $\to$ bb$_i$)$^i$ |] := Leaf bb$_i$ where i := min(I), that is a +\item\relax [| $\varnothing$, ($\varnothing$ $\to$ bb$_i$)$^i$ |] $\equiv$ Leaf bb$_i$ where i := min(I), that is a decision tree [|ms|] defined by an empty accessor and empty patterns pointing to blackboxes \emph{bb$_i$}. This respects the invariant -because a decomposition matrix in the case of empty rows returns -the first expression and we known that (Leaf bb)(v) := Match bb -\item\relax [| (a$_j$)$^j$, $\varnothing$ |] := Failure +because a source matrix in the case of empty rows returns +the first expression and (Leaf bb)(v) := Match bb +\item\relax [| (a$_j$)$^j$, $\varnothing$ |] $\equiv$ Failure \end{enumerate} Regarding non base cases: @@ -300,46 +368,53 @@ m := ((a_i)^i ((p_{ij})^i \to e_j)^{ij}) (k_k)^k := headconstructor(p_{i0})^i \] \begin{equation} - Groups(m) := ( k_k \to ((a)_{0.l})^{l \in Idx(k_k)} +++ (a_i)^{i \in I\backslash \{0\} }), \\ + Groups(m) := ( k_k \to ((a)_{0.l})^{l \in Idx(k_k)} +++ (a_i)^{i \in I\DZ}), \\ ( if p_{0j} is k(q_l) then \\ - (q_l)^{l \in Idx(k_k)} +++ (p_{ij})^{i \in I\backslash \{0\}} \to e_j \\ + (q_l)^{l \in Idx(k_k)} +++ (p_{ij})^{i \in I\DZ} \to e_j \\ if p_{0j} is \_ then \\ - (\_)^{l \in Idx(k_k)} +++ (p_{ij})^{i \in I\backslash \{0\}} \to e_j \\ + (\_)^{l \in Idx(k_k)} +++ (p_{ij})^{i \in I\DZ} \to e_j \\ else \bot )^j ), \\ - ((a_i)^{i \in I\backslash \{0\}}, ((p_{ij})^{i \in I\backslash \{0\}} \to e_j if p_{0j} is \_ else \bot)^{j \in J}) + ((a_i)^{i \in I\DZ}, ((p_{ij})^{i \in I\DZ} \to e_j if p_{0j} is \_ else \bot)^{j \in J}) \end{equation} -Groups(m) is an auxiliary function that decomposes a matrix m into +Groups(m) is an auxiliary function that source a matrix m into submatrices, according to the head constructor of their first pattern. Groups(m) returns one submatrix m\_r for each head constructor k that occurs on the first row of m, plus one "wildcard submatrix" m\(_{\text{wild}}\) that matches on all values that do not start with one of those head constructors. -Intuitively, m is equivalent to its decompositionin the following -sense: if the first pattern of an input vector (v\_i)\^{}i starts with one -of the head constructors k, then running (v\_i)\^{}i against m is the same -as running it against the submatrix m\_k; otherwise (its head +Intuitively, m is equivalent to its decomposition in the following +sense: if the first pattern of an input vector (v$_i$)$^i$ starts with one +of the head constructors k, then running (v$_i$)$^i$ against m is the same +as running it against the submatrix m$_k$; otherwise (its head constructor is none of the k) it is equivalent to running it against the wildcard submatrix. We formalize this intuition as follows: Lemma (Groups): Let \[m\] be a matrix with \[Groups(m) = (k_r \to m_r)^k, m_{wild}\]. -For any value vector \[(v_i)^l\] such that \[v_0 = k(v'_l)^l\] for some +For any value vector \((v_i)^l\) such that \(v_0 = k(v'_l)^l\) for some constructor k, we have: -\[ - if k = k_k for some k then - m(v_i)^i = m_k((v'_l)^l +++ (v_i)^{i\in I\backslash \{0\}}) - else - m(v_i)^i = m_{wild}(v_i)^{i\in I\backslash \{0\}} -\] +\begin{center} +\begin{tabular}{l} +if k = k$_k$ \text{ for some k then}\\ +\quad m(v$_i$)$^i$ = m$_k$((v\(_{\text{l}}\)')$^l$ \sout{+} (v\(_{\text{i}}\))\(^{\text{i$\in $I}\DZ}\))\\ +\text{else}\\ +\quad m(v$_i$)$^i$ = m\(_{\text{wild}}\)(v$_i$)\(^{\text{i$\in $I}\DZ}\)\\ +\end{tabular} +\end{center} +\begin{comment} +TODO: fix \{0} +\end{comment} + +\begin{enumerate} \item Proof: -\label{sec:org8dad6fc} -Let \[m\] be a matrix with \[Group(m) = (k_r \to m_r)^k, m_{wild}\]. -Let \[(v_i)^i\] be an input matrix with \[v_0 = k(v'_l)^l\] for some k. +\label{sec:org9868f7d} +Let \(m\) be a matrix with \[Group(m) = (k_r \to m_r)^k, m_{wild}\] +Let \((v_i)^i\) be an input matrix with \(v_0 = k(v'_l)^l\) for some k. We proceed by case analysis: \begin{itemize} \item either k is one of the k$_k$ for some k @@ -351,26 +426,31 @@ a family over each row r$_j$ of a matrix We know, from the definition of Groups(m), that m$_k$ is -\[ - ((a){0.l})^{l\in Idx(k_k)} +++ (a_i)^{i\in I\backslash \{0\}}), - ( - if p_{0j} is k(q_l) then - (q_l)^l +++ (p_{ij})^{i\in I\backslash \{0\}} \to e_j - if p_{0j} is _ then - (_)^l +++ (p_{ij})^{i\in I\backslash \{0\}} \to e_j - else \bot - )^j -\] +\begin{center} +\begin{tabular}{l} +((a)\{0.l\})\(^{\text{l$\in $Idx(k$_k$)}}\) \sout{+} (a$_i$)\(^{\text{i$\in $I}\DZ}\)),\\ +(\\ +\quad if p\(_{\text{0j}}\) is k(q$_l$) then\\ +\quad \quad (q$_l$)$^l$ \sout{+} (p\(_{\text{ij}}\))\(^{\text{i$\in $I}\DZ\ }\) $\to$ e$_j$\\ +\quad if p\(_{\text{0j}}\) is \_ then\\ +\quad \quad (\_)$^l$ \sout{+} (p\(_{\text{ij}}\))\(^{\text{i$\in $I}\DZ}\) $\to$ e$_j$\\ +\quad else $\bot$\\ +)\(^{\text{j$\in $J}}\)\\ +\end{tabular} +\end{center} By definition, m(v$_i$)$^i$ is -m(v$_i$)$^i$ = First(r$_j$(v$_i$)$^i$)$^j$ for m = ((a$_i$)$^i$, (r$_j$)$^j$) -(p$_i$)$^i$ (v$_i$)$^i$ = \{ - if p$_0$ = k(q$_l$)$^l$, v$_0$ = k'(v'$_k$)$^k$, k=Idx(k') and l=Idx(k) - if k $\ne$ k' then $\bot$ - if k = k' then ((q$_l$)$^l$ \sout{+} (p$_i$)\(^{\text{i$\in $I}\backslash\ \text{$\backslash${0$\backslash$}}}\)) ((v'$_k$)$^k$ \sout{+} (v$_i$)\(^{\text{i$\in $I}\backslash\ \text{$\backslash${0$\backslash$}}}\)) - if p$_0$ = (q$_1$|q$_2$) then - First( (q$_1$p$_i$\(^{\text{i$\in $I }\backslash\ \text{$\backslash${0$\backslash$}}}\)) v$_i$\(^{\text{i$\in $I }\backslash\ \text{$\backslash${0$\backslash$}}}\), (q$_2$p$_i$\(^{\text{i$\in $I }\backslash\ \text{$\backslash${0$\backslash$}}}\)) v$_i$\(^{\text{i$\in $I }\backslash\ \text{$\backslash${0$\backslash$}}}\) ) -\} +\begin{center} +\begin{tabular}{l} +m(v$_i$)$^i$ = First(r$_j$(v$_i$)$^i$)$^j$ for m = ((a$_i$)$^i$, (r$_j$)$^j$)\\ +(p$_i$)$^i$ (v$_i$)$^i$ = \{\\ +\quad if p$_0$ = k(q$_l$)$^l$, v$_0$ = k'(v'$_k$)$^k$, k=Idx(k') and l=Idx(k)\\ +\quad \quad if k $\ne$ k' then $\bot$\\ +if k = k' then ((q$_l$)$^l$ \sout{+} (p$_i$)\(^{\text{i$\in $I}\DZ}\)) ((v'$_k$)$^k$ \sout{+} (v$_i$)\(^{\text{i$\in $I}\DZ}\) )\\ +if p$_0$ = (q$_1$\(\vert{}\)q$_2$) then\\ +First( (q$_1$p$_i$\(^{\text{i$\in $I}\DZ}\)) v$_i$\(^{\text{i$\in $I}\DZ}\), (q$_2$p$_i$\(^{\text{i$\in $I }\DZ}\)) v$_i$\(^{\text{i$\in $I}\DZ}\))\}\\ +\end{tabular} +\end{center} For this reason, if we can prove that \begin{center} @@ -386,73 +466,318 @@ m(v$_i$)$^i$ = m$_k$((v'$_k$)$^k$ ++ (v$_i$)$^i$)\\ \end{center} from the above definition. -We can also show that a$_i$ = a\(_{\text{0.l}}\)$^l$ \sout{+} a\(_{\text{i$\in $I}\backslash\ \text{$\backslash${0$\backslash$}}}\) because v(a$_0$) = K(v(a)\{0.l\})$^l$) +We can also show that a$_i$ = (a\(_{\text{0.l}}\))$^l$ \sout{+} a\(_{\text{i$\in $I}\DZ}\) because v(a$_0$) = K(v(a)\{0.l\})$^l$) \end{enumerate} +\subsection{Target translation} +\label{sec:org5f1edb6} + +The target program of the following general form is parsed using a parser +generated by Menhir, a LR(1) parser generator for the OCaml programming language. +Menhir compiles LR(1) a grammar specification, in this case a subset +of the Lambda intermediate language, down to OCaml code. +This is the grammar of the target language (TODO: check menhir grammar) +\begin{center} +\begin{tabular}{l} +start ::= sexpr\\ +sexpr ::= variable \(\vert{}\) string \(\vert{}\) "(" special\_form ")"\\ +string ::= "$\backslash$"" identifier "$\backslash$"" ;; string between doublequotes\\ +variable ::= identifier\\ +special\_form ::= let\(\vert{}\)catch\(\vert{}\)if\(\vert{}\)switch\(\vert{}\)switch-star\(\vert{}\)field\(\vert{}\)apply\(\vert{}\)isout\\ +let ::= "let" assignment sexpr ;; (assignment sexpr)+ outside of pattern match code\\ +assignment ::= "function" variable variable+ ;; the first variable is the identifier of the function\\ +field ::= "field" digit variable\\ +apply ::= ocaml\_lambda\_code ;; arbitrary code\\ +catch ::= "catch" sexpr with sexpr\\ +with ::= "with" "(" label ")"\\ +exit ::= "exit" label\\ +switch-star ::= "switch*" variable case*\\ +switch::= "switch" variable case* "default:" sexpr\\ +case ::= "case" casevar ":" sexpr\\ +casevar ::= ("tag"\(\vert{}\)"int") integer\\ +if ::= "if" bexpr sexpr sexpr\\ +bexpr ::= "(" ("!="\(\vert{}\)"\texttt{="\textbackslash{}vert\{\}">}"\(\vert{}\)"<="\(\vert{}\)">"\(\vert{}\)"<") sexpr digit \(\vert{}\) field ")"\\ +label ::= integer\\ +\end{tabular} +\end{center} +The prototype doesn't support strings. + +The AST built by the parser is traversed and evaluated by the symbolic +execution engine. +Given that the target language supports jumps in the form of "catch - exit" +blocks the engine tries to evaluate the instructions inside the blocks +and stores the result of the partial evaluation into a record. +When a jump is encountered, the information at the point allows to +finalize the evaluation of the jump block. +In the environment the engine also stores bindings to values and +functions. +Integer additions and subtractions are simplified in a second pass. +The result of the symbolic evaluation is a target decision tree C$_T$ +\begin{center} +\begin{tabular}{l} +C$_T$ ::= Leaf bb \(\vert{}\) Switch(a, ($\pi$$_i$ $\to$ C$_i$)\(^{\text{i$\in $S}}\) , C?) \(\vert{}\) Failure\\ +v$_T$ ::= Cell(tag $\in $ $\mathbb{N}$, (v$_i$)\(^{\text{i$\in $I}}\)) \(\vert{}\) n $\in $ $\mathbb{N}$\\ +\end{tabular} +\end{center} +Every branch of the decision tree is "constrained" by a domain +\begin{center} +\begin{tabular}{l} +Domain $\pi$ = \{ n\(\vert{}\)n$\in $$\mathbb{N}$ x n\(\vert{}\)n$\in $Tag$\subseteq$$\mathbb{N}$ \}\\ +\end{tabular} +\end{center} +Intuitively, the $\pi$ condition at every branch tells us the set of +possible values that can "flow" through that path. +$\pi$ conditions are refined by the engine during the evaluation; at the +root of the decision tree the domain corresponds to the set of +possible values that the type of the function can hold. +C? is the fallback node of the tree that is taken whenever the value +at that point of the execution can't flow to any other subbranch. +Intuitively, the $\pi$\(_{\text{fallback}}\) condition of the C? fallback node is +\begin{center} +\begin{tabular}{l} +$\pi$\(_{\text{fallback}}\) = ¬\bigcup\limits\(_{\text{i$\in $I}}\)$\pi$$_i$\\ +\end{tabular} +\end{center} +The fallback node can be omitted in the case where the domain of the +children nodes correspond to set of possible values pointed by the +accessor at that point of the execution +\begin{center} +\begin{tabular}{l} +domain(v$_S$(a)) = \bigcup\limits\(_{\text{i$\in $I}}\)$\pi$$_i$\\ +\end{tabular} +\end{center} +We say that a translation of a target program to a decision tree +is correct when for every possible input, the target program and its +respective decision tree produces the same result +\begin{center} +\begin{tabular}{l} +$\forall$v$_T$, t$_T$(v$_T$) = $\llbracket$t$_T$$\rrbracket$$_T$(v$_T$)\\ +\end{tabular} +\end{center} + + + +\subsection{Equivalence checking} +\label{sec:org4d9383d} + +The equivalence checking algorithm takes as input a domain of +possible values \emph{S} and a pair of source and target decision trees and +in case the two trees are not equivalent it returns a counter example. +The algorithm respects the following correctness statement: + +\begin{comment} +TODO: we have to define what \coversTEX mean for readers to understand the specifications +(or we use a simplifying lie by hiding \coversTEX in the statements). +\end{comment} + +\begin{align*} + \EquivTEX S {C_S} {C_T} \emptyqueue = \YesTEX \;\land\; \coversTEX {C_T} S + & \implies + \forall v_S \approx v_T \in S,\; C_S(v_S) = C_T(v_T) +\\ + \EquivTEX S {C_S} {C_T} \emptyqueue = \NoTEX {v_S} {v_T} \;\land\; \coversTEX {C_T} S + & \implies + v_S \approx v_T \in S \;\land\; C_S(v_S) \neq C_T(v_T) +\end{align*} +Our equivalence-checking algorithm \(\EquivTEX S {C_S} {C_T} G\) is +a exactly decision procedure for the provability of the judgment +\((\EquivTEX S {C_S} {C_T} G)\), defined below. +\begin{mathpar} + \begin{array}{l@{~}r@{~}l} + & & \text{\emph{constraint trees}} \\ + C & \bnfeq & \Leaf t \\ + & \bnfor & \Failure \\ + & \bnfor & \Switch a {\Fam i {\pi_i, C_i}} \Cfb \\ + & \bnfor & \Guard t {C_0} {C_1} \\ + \end{array} + + \begin{array}{l@{~}r@{~}l} + & & \text{\emph{boolean result}} \\ + b & \in & \{ 0, 1 \} \\[0.5em] + & & \text{\emph{guard queues}} \\ + G & \bnfeq & (t_1 = b_1), \dots, (t_n = b_n) \\ + \end{array} + + \begin{array}{l@{~}r@{~}l} + & & \text{\emph{input space}} \\ + S & \subseteq & \{ (v_S, v_T) \mid \vrel {v_S} {v_T} \} \\ + \end{array} + \\ + \infer{ } + {\EquivTEX \emptyset {C_S} {C_T} G} + + \infer{ } + {\EquivTEX S \Failure \Failure \emptyqueue} + + \infer + {\trel {t_S} {t_T}} + {\EquivTEX S {\Leaf {t_S}} {\Leaf {t_T}} \emptyqueue} + + \infer + {\forall i,\; + \EquivTEX + {(S \land a = K_i)} + {C_i} {\trim {C_T} {a = K_i}} G + \\ + \EquivTEX + {(S \land a \notin \Fam i {K_i})} + \Cfb {\trim {C_T} {a \notin \Fam i {K_i}}} G + } + {\EquivTEX S + {\Switch a {\Fam i {K_i, C_i}} \Cfb} {C_T} G} + +\begin{comment} + % TODO explain somewhere why the judgment is not symmetric: + % we avoid defining trimming on source trees, which would + % require more expressive conditions than just constructors +\end{comment} + \infer + {C_S \in {\Leaf t, \Failure} + \\ + \forall i,\; \EquivTEX {(S \land a \in D_i)} {C_S} {C_i} G + \\ + \EquivTEX {(S \land a \notin \Fam i {D_i})} {C_S} \Cfb G} + {\EquivTEX S + {C_S} {\Switch a {\Fam i {D_i} {C_i}} \Cfb} G} + + \infer + {\EquivTEX S {C_0} {C_T} {G, (t_S = 0)} + \\ + \EquivTEX S {C_1} {C_T} {G, (t_S = 1)}} + {\EquivTEX S + {\Guard {t_S} {C_0} {C_1}} {C_T} G} + + \infer + {\trel {t_S} {t_T} + \\ + \EquivTEX S {C_S} {C_b} G} + {\EquivTEX S + {C_S} {\Guard {t_T} {C_0} {C_1}} {(t_S = b), G}} +\end{mathpar} +Running a program t$_S$ or its translation $\llbracket$t$_S$$\rrbracket$ against an input v$_S$ +produces as a result \emph{r} in the following way: +\begin{center} +\begin{tabular}{l} +( $\llbracket$t$_S$$\rrbracket$$_S$(v$_S$) $\equiv$ C$_S$(v$_S$) ) $\to$ r\\ +t$_S$(v$_S$) $\to$ r\\ +\end{tabular} +\end{center} +Likewise +\begin{center} +\begin{tabular}{l} +( $\llbracket$t$_T$$\rrbracket$$_T$(v$_T$) $\equiv$ C$_T$(v$_T$) ) $\to$ r\\ +t$_T$(v$_T$) $\to$ r\\ +result r ::= guard list * (Match blackbox \(\vert{}\) NoMatch \(\vert{}\) Absurd)\\ +guard ::= blackbox.\\ +\end{tabular} +\end{center} +Having defined equivalence between two inputs of which one is +expressed in the source language and the other in the target language, +v$_S$ $\simeq$ v$_T$, we can define the equivalence between a couple of programs or +a couple of decision trees +\begin{center} +\begin{tabular}{l} +t$_S$ $\simeq$ t$_T$ := $\forall$v$_S$$\simeq$v$_T$, t$_S$(v$_S$) = t$_T$(v$_T$)\\ +C$_S$ $\simeq$ C$_T$ := $\forall$v$_S$$\simeq$v$_T$, C$_S$(v$_S$) = C$_T$(v$_T$)\\ +\end{tabular} +\end{center} +The result of the proposed equivalence algorithm is \emph{Yes} or \emph{No(v$_S$, +v$_T$)}. In particular, in the negative case, v$_S$ and v$_T$ are a couple of +possible counter examples for which the decision trees produce a +different result. + +In the presence of guards we can say that two results are +equivalent modulo the guards queue, written \emph{r$_1$ $\simeq$gs r$_2$}, when: +\begin{center} +\begin{tabular}{l} +(gs$_1$, r$_1$) $\simeq$gs (gs$_2$, r$_2$) $\Leftrightarrow$ (gs$_1$, r$_1$) = (gs$_2$ ++ gs, r$_2$)\\ +\end{tabular} +\end{center} +We say that C$_T$ covers the input space \emph{S}, written +\emph{covers(C$_T$, S)} when every value v$_S$$\in $S is a valid input to the +decision tree C$_T$. (TODO: rephrase) +Given an input space \emph{S} and a couple of decision trees, where +the target decision tree C$_T$ covers the input space \emph{S}, we say that +the two decision trees are equivalent when: +\begin{center} +\begin{tabular}{l} +equiv(S, C$_S$, C$_T$, gs) = Yes $\wedge$ covers(C$_T$, S) $\to$ $\forall$v$_S$$\simeq$v$_T$ $\in $ S, C$_S$(v$_S$) $\simeq$gs C$_T$(v$_T$)\\ +\end{tabular} +\end{center} +Similarly we say that a couple of decision trees in the presence of +an input space \emph{S} are \emph{not} equivalent when: +\begin{center} +\begin{tabular}{l} +equiv(S, C$_S$, C$_T$, gs) = No(v$_S$,v$_T$) $\wedge$ covers(C$_T$, S) $\to$ v$_S$$\simeq$v$_T$ $\in $ S $\wedge$ C$_S$(v$_S$) $\ne$gs C$_T$(v$_T$)\\ +\end{tabular} +\end{center} +Corollary: For a full input space \emph{S}, that is the universe of the +target program we say: +\begin{center} +\begin{tabular}{l} +equiv(S, $\llbracket$t$_S$$\rrbracket$$_S$, $\llbracket$t$_T$$\rrbracket$$_T$, $\varnothing$) = Yes $\Leftrightarrow$ t$_S$ $\simeq$ t$_T$\\ +\end{tabular} +\end{center} + -\subsection{Proof of equivalence checking} -\label{sec:orgffce3ea} \begin{comment} TODO: put ^i$\in $I where needed \end{comment} \subsubsection{The trimming lemma} The trimming lemma allows to reduce the size of a decision tree given -an accessor $\to$ $\pi$ relation (TODO: expand) +an accessor \emph{a} $\to$ $\pi$ relation (TODO: expand) \begin{center} \begin{tabular}{l} -$\forall$v$_T$ $\in $ (a$\to$$\pi$), C$_T$(v$_T$) = C\(_{\text{t/a$\to$$\pi$(k$_i$)}}\)(v$_T$)\\ +$\forall$v$_T$ $\in $ (a$\to$$\pi$), C$_T$(v$_T$) = C\(_{\text{t/a$\to$$\pi$}}\)(v$_T$)\\ \end{tabular} \end{center} We prove this by induction on C$_T$: - a. C$_T$ = Leaf\(_{\text{bb}}\): when the decision tree is a leaf terminal, we - know that + +\begin{itemize} +\item C$_T$ = Leaf\(_{\text{bb}}\): when the decision tree is a leaf terminal, the result of trimming on a Leaf is the Leaf itself \begin{center} \begin{tabular}{l} Leaf\(_{\text{bb/a$\to$$\pi$}}\)(v) = Leaf\(_{\text{bb}}\)(v)\\ \end{tabular} \end{center} -That means that the result of trimming on a Leaf is the Leaf itself -b. The same applies to Failure terminal +\item The same applies to Failure terminal \begin{center} \begin{tabular}{l} Failure\(_{\text{/a$\to$$\pi$}}\)(v) = Failure(v)\\ \end{tabular} \end{center} -c. When C$_T$ = Node(b, ($\pi$$\to$C$_i$)$^i$)\(_{\text{/a$\to$$\pi$}}\) then -we look at the accessor \emph{a} of the subtree C$_i$ and -we define $\pi$$_i$' = $\pi$$_i$ if a$\ne$b else $\pi$$_i$$\cap$$\pi$ -Trimming a switch node yields the following result: +\item When C$_T$ = Switch(b, ($\pi$$\to$C$_i$)$^i$)\(_{\text{/a$\to$$\pi$}}\) then we look at the accessor +\emph{a} of the subtree C$_i$ and we define $\pi$$_i$' = $\pi$$_i$ if a$\ne$b else $\pi$$_i$$\cap$$\pi$ Trimming +a switch node yields the following result: \begin{center} \begin{tabular}{l} -Node(b, ($\pi$$\to$C$_i$)$^i$)\(_{\text{/a$\to$$\pi$}}\) := Node(b, ($\pi$'$_i$$\to$C\(_{\text{i/a$\to$$\pi$}}\))$^i$)\\ +Switch(b, ($\pi$$\to$C$_i$)\(^{\text{i$\in $I}}\))\(_{\text{/a$\to$$\pi$}}\) := Switch(b, ($\pi$'$_i$$\to$C\(_{\text{i/a$\to$$\pi$}}\))\(^{\text{i$\in $I}}\))\\ \end{tabular} \end{center} - +\end{itemize} \begin{comment} -Actually in the proof.org file I transcribed: - e. Unreachabe $\to$ $\bot$ -This is not correct because you don't have Unreachable nodes in target decision trees +TODO: understand how to properly align lists +check that every list is aligned \end{comment} - For the trimming lemma we have to prove that running the value v$_T$ against -the decistion tree C$_T$ is the same as running v$_T$ against the tree +the decision tree C$_T$ is the same as running v$_T$ against the tree C\(_{\text{trim}}\) that is the result of the trimming operation on C$_T$ \begin{center} \begin{tabular}{l} -C$_T$(v$_T$) = C\(_{\text{trim}}\)(v$_T$) = Node(b, ($\pi$$_i$'$\to$C\(_{\text{i/a$\to$$\pi$}}\))$^i$)(v$_T$)\\ +C$_T$(v$_T$) = C\(_{\text{trim}}\)(v$_T$) = Switch(b, ($\pi$$_i$'$\to$C\(_{\text{i/a$\to$$\pi$}}\))\(^{\text{i$\in $I}}\))(v$_T$)\\ \end{tabular} \end{center} We can reason by first noting that when v$_T$$\notin$(b$\to$$\pi$$_i$)$^i$ the node must be a Failure node. -In the case where $\exists$k| v$_T$$\in $(b$\to$$\pi$$_k$) then we can prove that +In the case where $\exists$k \(\vert{}\) v$_T$$\in $(b$\to$$\pi$$_k$) then we can prove that \begin{center} \begin{tabular}{l} -C\(_{\text{k/a$\to$$\pi$}}\)(v$_T$) = Node(b, ($\pi$$_i$'$\to$C\(_{\text{i/a$\to$$\pi$}}\))$^i$)(v$_T$)\\ +C\(_{\text{k/a$\to$$\pi$}}\)(v$_T$) = Switch(b, ($\pi$$_i$'$\to$C\(_{\text{i/a$\to$$\pi$}}\))\(^{\text{i$\in $I}}\))(v$_T$)\\ \end{tabular} \end{center} because when a $\ne$ b then $\pi$$_k$'= $\pi$$_k$ and this means that v$_T$$\in $$\pi$$_k$' -while when a = b then $\pi$$_k$'=($\pi$$_k$$\cap$$\pi$) and vt$\in $$\pi$$_k$' because: +while when a = b then $\pi$$_k$'=($\pi$$_k$$\cap$$\pi$) and v$_T$$\in $$\pi$$_k$' because: \begin{itemize} \item by the hypothesis, v$_T$$\in $$\pi$ \item we are in the case where v$_T$$\in $$\pi$$_k$ @@ -481,8 +806,8 @@ Covering lemma: The equivalence checking algorithm takes as parameters an input space \emph{S}, a source decision tree \emph{C$_S$} and a target decision tree \emph{C$_T$}: \begin{center} -\begin{tabular}{ll} -equiv(S, C$_S$, C$_T$) $\to$ Yes & No(v$_S$, v$_T$)\\ +\begin{tabular}{l} +equiv(S, C$_S$, C$_T$) $\to$ Yes \(\vert{}\) No(v$_S$, v$_T$)\\ \end{tabular} \end{center} @@ -506,7 +831,6 @@ outputs a different result. equiv(S, C$_S$, C$_T$) = No(v$_S$,v$_T$) and cover(C$_T$, S) $\to$ $\forall$ v$_S$ $\simeq$ v$_T$$\in $S $\wedge$ C$_S$(v$_S$) $\ne$ C$_T$(v$_T$)\\ \end{tabular} \end{center} - We define the following \begin{center} \begin{tabular}{l} @@ -563,8 +887,8 @@ Given that $\forall$v, Failure(v) = Failure, the statement holds. \item When we have a Leaf or a Failure at the left side: \begin{center} \begin{tabular}{l} -equiv(S, Failure as C$_S$, Node(a, ($\pi$$_i$ $\to$ C$_T$$_i$)$^i$)) := Forall(equiv( S$\cap$a$\to$$\pi$(k$_i$)), C$_S$, C$_T$$_i$)$^i$)\\ -equiv(S, Leaf bb$_S$ as C$_S$, Node(a, ($\pi$$_i$ $\to$ C$_T$$_i$)$^i$)) := Forall(equiv( S$\cap$a$\to$$\pi$(k$_i$)), C$_S$, C$_T$$_i$)$^i$)\\ +equiv(S, Failure as C$_S$, Switch(a, ($\pi$$_i$ $\to$ C$_T$$_i$)\(^{\text{i$\in $I}}\))) := Forall(equiv( S$\cap$a$\to$$\pi$(k$_i$)), C$_S$, C$_T$$_i$)\(^{\text{i$\in $I}}\))\\ +equiv(S, Leaf bb$_S$ as C$_S$, Switch(a, ($\pi$$_i$ $\to$ C$_T$$_i$)\(^{\text{i$\in $I}}\))) := Forall(equiv( S$\cap$a$\to$$\pi$(k$_i$)), C$_S$, C$_T$$_i$)\(^{\text{i$\in $I}}\))\\ \end{tabular} \end{center} The algorithm either returns Yes for every sub-input space S$_i$ := S$\cap$(a$\to$$\pi$(k$_i$)) and @@ -593,25 +917,25 @@ we can say that equiv(S$_i$, C$_S$, C$_T$$_i$) = No(v$_S$, v$_T$) for some minimal k$\in $I\\ \end{tabular} \end{center} -\item When we have a Node on the right we define $\pi$$_n$ as the domain of +\item When we have a Switch on the right we define $\pi$\(_{\text{fallback}}\) as the domain of values not covered but the union of the constructors k$_i$ \begin{center} \begin{tabular}{l} -$\pi$$_n$ = ¬($\bigcup$$\pi$(k$_i$)$^i$)\\ +$\pi$\(_{\text{fallback}}\) = ¬\bigcup\limits\(_{\text{i$\in $I}}\)$\pi$(k$_i$)\\ \end{tabular} \end{center} The algorithm proceeds by trimming \begin{center} \begin{tabular}{l} -equiv(S, Node(a, (k$_i$ $\to$ C$_S$$_i$)$^i$, C\(_{\text{sf}}\)), C$_T$) :=\\ -Forall(equiv( S$\cap$(a$\to$$\pi$(k$_i$)$^i$), C$_S$$_i$, C\(_{\text{t/a$\to$$\pi$(k$_i$)}}\))$^i$ \sout{+} equiv(S$\cap$(a$\to$$\pi$(k$_i$)), C$_S$, C\(_{\text{a$\to$$\pi$$_n$}}\)))\\ +equiv(S, Switch(a, (k$_i$ $\to$ C$_S$$_i$)\(^{\text{i$\in $I}}\), C\(_{\text{sf}}\)), C$_T$) :=\\ +Forall(equiv( S$\cap$(a$\to$$\pi$(k$_i$)\(^{\text{i$\in $I}}\)), C$_S$$_i$, C\(_{\text{t/a$\to$$\pi$(k$_i$)}}\))\(^{\text{i$\in $I}}\) \sout{+} equiv(S$\cap$(a$\to$$\pi$$_n$), C$_S$, C\(_{\text{a$\to$$\pi$}_{\text{fallback}}}\)))\\ \end{tabular} \end{center} The statement still holds and we show this by first analyzing the \emph{Yes} case: \begin{center} \begin{tabular}{l} -Forall(equiv( S$\cap$(a$\to$$\pi$(k$_i$)$^i$), C$_S$$_i$, C\(_{\text{t/a$\to$$\pi$(k$_i$)}}\))$^i$ = Yes\\ +Forall(equiv( S$\cap$(a$\to$$\pi$(k$_i$)\(^{\text{i$\in $I}}\)), C$_S$$_i$, C\(_{\text{t/a$\to$$\pi$(k$_i$)}}\))\(^{\text{i$\in $I}}\) = Yes\\ \end{tabular} \end{center} The constructor k is either included in the set of constructors k$_i$: @@ -657,3 +981,4 @@ C$_S$$_k$(v$_S$) $\ne$ (C\(_{\text{t/a$\to$$\pi$$_k$}}\)(v$_T$) = C$_T$(v$_T$))\ \end{center} \end{enumerate} \end{document} + \ No newline at end of file diff --git a/tesi/gabriel/part4_unicode.org b/tesi/gabriel/part4_unicode.org new file mode 100644 index 0000000..5083f82 --- /dev/null +++ b/tesi/gabriel/part4_unicode.org @@ -0,0 +1,701 @@ +#+LANGUAGE: en +#+LaTeX_CLASS: article +#+LaTeX_HEADER: \linespread{1.25} +#+LaTeX_HEADER: \usepackage{algorithm} +#+LaTeX_HEADER: \usepackage{comment} +#+LaTeX_HEADER: \usepackage{algpseudocode} +#+LaTeX_HEADER: \usepackage{amsmath,amssymb,amsthm} +#+LaTeX_HEADER: \newtheorem{definition}{Definition} +#+LaTeX_HEADER: \usepackage{mathpartir} +#+LaTeX_HEADER: \usepackage{graphicx} +#+LaTeX_HEADER: \usepackage{listings} +#+LaTeX_HEADER: \usepackage{color} +#+LaTeX_HEADER: \usepackage{stmaryrd} +#+LaTeX_HEADER: \newcommand{\semTEX}[1]{{\llbracket{#1}\rrbracket}} +#+LaTeX_HEADER: \newcommand{\EquivTEX}[3]{\mathsf{equiv}(#1, #2, #3)} % \equiv is already taken +#+LaTeX_HEADER: \newcommand{\coversTEX}[2]{#1 \mathrel{\mathsf{covers}} #2} +#+LaTeX_HEADER: \newcommand{\YesTEX}{\mathsf{Yes}} +#+LaTeX_HEADER: \newcommand{\DZ}{\backslash\text{\{0\}}} +#+LaTeX_HEADER: \newcommand{\NoTEX}[2]{\mathsf{No}(#1, #2)} +#+LaTeX_HEADER: \usepackage{comment} +#+LaTeX_HEADER: \usepackage{mathpartir} +#+LaTeX_HEADER: \usepackage{stmaryrd} % llbracket, rrbracket +#+LaTeX_HEADER: \usepackage{listings} +#+LaTeX_HEADER: \usepackage{notations} +#+LaTeX_HEADER: \lstset{ +#+LaTeX_HEADER: mathescape=true, +#+LaTeX_HEADER: language=[Objective]{Caml}, +#+LaTeX_HEADER: basicstyle=\ttfamily, +#+LaTeX_HEADER: extendedchars=true, +#+LaTeX_HEADER: showstringspaces=false, +#+LaTeX_HEADER: aboveskip=\smallskipamount, +#+LaTeX_HEADER: % belowskip=\smallskipamount, +#+LaTeX_HEADER: columns=fullflexible, +#+LaTeX_HEADER: moredelim=**[is][\color{blue}]{/*}{*/}, +#+LaTeX_HEADER: moredelim=**[is][\color{green!60!black}]{/!}{!/}, +#+LaTeX_HEADER: moredelim=**[is][\color{orange}]{/(}{)/}, +#+LaTeX_HEADER: moredelim=[is][\color{red}]{/[}{]/}, +#+LaTeX_HEADER: xleftmargin=1em, +#+LaTeX_HEADER: } +#+LaTeX_HEADER: \lstset{aboveskip=0.4ex,belowskip=0.4ex} + +#+EXPORT_SELECT_TAGS: export +#+EXPORT_EXCLUDE_TAGS: noexport +#+OPTIONS: H:2 toc:nil \n:nil @:t ::t |:t ^:{} _:{} *:t TeX:t LaTeX:t +#+STARTUP: showall +* Translation validation of the Pattern Matching Compiler + +** Source program +The algorithm takes as its input a source program and translates it +into an algebraic data structure which type we call /decision_tree/. + +#+BEGIN_SRC +type decision_tree = + | Unreachable + | Failure + | Leaf of source_expr + | Guard of source_blackbox * decision_tree * decision_tree + | Switch of accessor * (constructor * decision_tree) list * decision_tree +#+END_SRC + +Unreachable, Leaf of source_expr and Failure are the terminals of the three. +We distinguish +- Unreachable: statically it is known that no value can go there +- Failure: a value matching this part results in an error +- Leaf: a value matching this part results into the evaluation of a + source black box of code + +The algorithm doesn't support type-declaration-based analysis +to know the list of constructors at a given type. +Let's consider some trivial examples: + +#+BEGIN_SRC +function true -> 1 +#+END_SRC + +is translated to +|Switch ([(true, Leaf 1)], Failure) +while +#+BEGIN_SRC +function +true -> 1 +| false -> 2 +#+END_SRC +will be translated to +|Switch ([(true, Leaf 1); (false, Leaf 2)]) + +It is possible to produce Unreachable examples by using + refutation clauses (a "dot" in the right-hand-side) +#+BEGIN_SRC +function +true -> 1 +| false -> 2 +| _ -> . +#+END_SRC +that gets translated into +Switch ([(true, Leaf 1); (false, Leaf 2)], Unreachable) + +We trust this annotation, which is reasonable as the type-checker +verifies that it indeed holds. + +Guard nodes of the tree are emitted whenever a guard is found. Guards +node contains a blackbox of code that is never evaluated and two +branches, one that is taken in case the guard evaluates to true and +the other one that contains the path taken when the guard evaluates to +true. + +\begin{comment} +[ ] Finisci con Switch +[ ] Spiega del fallback +[ ] rivedi compilazione per tenere in considerazione il tuo albero invece che le Lambda +\end{comment} + +The source code of a pattern matching function has the +following form: + +|match variable with +|\vert pattern₁ \to expr₁ +|\vert pattern₂ when guard \to expr₂ +|\vert pattern₃ as var \to expr₃ +|⋮ +|\vert pₙ \to exprₙ + +Patterns could or could not be exhaustive. + +Pattern matching code could also be written using the more compact form: +|function +|\vert pattern₁ \to expr₁ +|\vert pattern₂ when guard \to expr₂ +|\vert pattern₃ as var \to expr₃ +|⋮ +|\vert pₙ \to exprₙ + + +This BNF grammar describes formally the grammar of the source program: + +| start ::= "match" id "with" patterns \vert{} "function" patterns +| patterns ::= (pattern0\vert{}pattern1) pattern1+ +| ;; pattern0 and pattern1 are needed to distinguish the first case in which +| ;; we can avoid writing the optional vertical line +| pattern0 ::= clause +| pattern1 ::= "\vert" clause +| clause ::= lexpr "->" rexpr +| lexpr ::= rule (ε\vert{}condition) +| rexpr ::= _code ;; arbitrary code +| rule ::= wildcard\vert{}variable\vert{}constructor_pattern\vert{}or_pattern ;; +| ;; rules +| wildcard ::= "_" +| variable ::= identifier +| constructor_pattern ::= constructor (rule\vert{}ε) (assignment\vert{}ε) +| constructor ::= int\vert{}float\vert{}char\vert{}string\vert{}bool \vert{}unit\vert{}record\vert{}exn\vert{}objects\vert{}ref \vert{}list\vert{}tuple\vert{}array\vert{}variant\vert{}parameterized_variant ;; data types +| or_pattern ::= rule ("\vert{}" wildcard\vert{}variable\vert{}constructor_pattern)+ +| condition ::= "when" bexpr +| assignment ::= "as" id +| bexpr ::= _code ;; arbitrary code + +A source program tₛ is a collection of pattern clauses pointing to +/bb/ terms. Running a program tₛ against an input value vₛ produces as +a result /r/: +| tₛ ::= (p → bb)^{i∈I} +| p ::= \vert K(pᵢ)ⁱ, i ∈ I \vert (p\vert{}q) \vert n ∈ ℕ +| r ::= guard list * (Match bb \vert{} NoMatch \vert{} Absurd) +| tₛ(vₛ) → r + +TODO: argument on what it means to run a source program + +/guard/ and /bb/ expressions are treated as blackboxes of OCaml code. +A sound approach for treating these blackboxes would be to inspect the +OCaml compiler during translation to Lambda code and extract the +blackboxes compiled in their Lambda representation. +This would allow to test for equality with the respective blackbox at +the target level. +Given that this level of introspection is currently not possibile, we +decided to restrict the structure of blackboxes to the following (valid) OCaml +code: + +#+BEGIN_SRC +external guard : 'a -> 'b = "guard" +external observe : 'a -> 'b = "observe" +#+END_SRC + +We assume these two external functions /guard/ and /observe/ with a valid +type that lets the user pass any number of arguments to them. +All the guards are of the form \texttt{guard }, where the + are expressed using the OCaml pattern matching language. +Similarly, all the right-hand-side expressions are of the form +\texttt{observe ...} with the same constraints on arguments. + +#+BEGIN_SRC +type t = K1 | K2 of t (* declaration of an algebraic and recursive datatype t *) + +let _ = function + | K1 -> observe 0 + | K2 K1 -> observe 1 + | K2 x when guard x -> observe 2 + | K2 (K2 x) as y when guard x y -> observe 3 + | K2 _ -> observe 4 +#+END_SRC + + + +In our prototype we make use of accessors to encode stored values. +\begin{minipage}{0.2\linewidth} +\begin{verbatim} +let value = 1 :: 2 :: 3 :: [] +(* that can also be written *) +let value = [] + |> List.cons 3 + |> List.cons 2 + |> List.cons 1 +\end{verbatim} +\end{minipage} +\hfill +\begin{minipage}{0.5\linewidth} +\begin{verbatim} + + + (field 0 x) = 1 + (field 0 (field 1 x)) = 2 + (field 0 (field 1 (field 1 x)) = 3 + (field 0 (field 1 (field 1 (field 1 x)) = [] +\end{verbatim} +\end{minipage} +An \emph{accessor} /a/ represents the +access path to a value that can be reached by deconstructing the +scrutinee. +| a ::= Here \vert n.a +The above example, in encoded form: +\begin{verbatim} +Here = 1 +1.Here = 2 +1.1.Here = 3 +1.1.1.Here = [] +\end{verbatim} +In our prototype the source matrix mₛ is defined as follows +| SMatrix mₛ := (aⱼ)^{j∈J}, ((p_{ij})^{j∈J} → bbᵢ)^{i∈I} + +Source matrices are used to build source decision trees Cₛ. +A decision tree is defined as either a Leaf, a Failure terminal or +an intermediate node with different children sharing the same accessor /a/ +and an optional fallback. +Failure is emitted only when the patterns don't cover the whole set of +possible input values /S/. The fallback is not needed when the user +doesn't use a wildcard pattern. +%%% Give example of thing +| Cₛ ::= Leaf bb \vert Switch(a, (Kᵢ → Cᵢ)^{i∈S} , C?) \vert Failure \vert Unreachable +| vₛ ::= K(vᵢ)^{i∈I} \vert n ∈ ℕ +\begin{comment} +Are K and Here clear here? +\end{comment} +We say that a translation of a source program to a decision tree +is correct when for every possible input, the source program and its +respective decision tree produces the same result + +| ∀vₛ, tₛ(vₛ) = 〚tₛ〛ₛ(vₛ) + +We define the decision tree of source programs +〚tₛ〛 +in terms of the decision tree of pattern matrices +〚mₛ〛 +by the following: +| 〚((pᵢ → bbᵢ)^{i∈I}〛 := 〚(Here), (pᵢ → bbᵢ)^{i∈I} 〛 +Decision tree computed from pattern matrices respect the following invariant: +| ∀v (vᵢ)^{i∈I} = v(aᵢ)^{i∈I} → 〚m〛(v) = m(vᵢ)^{i∈I} for m = ((aᵢ)^{i∈I}, (rᵢ)^{i∈I}) +| v(Here) = v +| K(vᵢ)ⁱ(k.a) = vₖ(a) if k ∈ [0;n[ +\begin{comment} +TODO: EXPLAIN +\end{comment} + +We proceed to show the correctness of the invariant by a case +analysys. + +Base cases: +1. [| ∅, (∅ → bbᵢ)ⁱ |] ≡ Leaf bbᵢ where i := min(I), that is a + decision tree [|ms|] defined by an empty accessor and empty + patterns pointing to blackboxes /bbᵢ/. This respects the invariant + because a source matrix in the case of empty rows returns + the first expression and (Leaf bb)(v) := Match bb +2. [| (aⱼ)ʲ, ∅ |] ≡ Failure + +Regarding non base cases: +Let's first define +| let Idx(k) := [0; arity(k)[ +| let First(∅) := ⊥ +| let First((aⱼ)ʲ) := a_{min(j∈J≠∅)} +\[ +m := ((a_i)^i ((p_{ij})^i \to e_j)^{ij}) +\] +\[ + (k_k)^k := headconstructor(p_{i0})^i +\] +\begin{equation} + Groups(m) := ( k_k \to ((a)_{0.l})^{l \in Idx(k_k)} +++ (a_i)^{i \in I\DZ}), \\ + ( if p_{0j} is k(q_l) then \\ + (qₗ)^{l \in Idx(k_k)} +++ (p_{ij})^{i \in I\DZ} \to e_j \\ + if p_{0j} is \_ then \\ + (\_)^{l \in Idx(k_k)} +++ (p_{ij})^{i \in I\DZ} \to e_j \\ + else \bot )^j ), \\ + ((a_i)^{i \in I\DZ}, ((p_{ij})^{i \in I\DZ} \to eⱼ if p_{0j} is \_ else \bot)^{j \in J}) +\end{equation} + +Groups(m) is an auxiliary function that source a matrix m into +submatrices, according to the head constructor of their first pattern. +Groups(m) returns one submatrix m_r for each head constructor k that +occurs on the first row of m, plus one "wildcard submatrix" m_{wild} +that matches on all values that do not start with one of those head +constructors. + +Intuitively, m is equivalent to its decomposition in the following +sense: if the first pattern of an input vector (vᵢ)ⁱ starts with one +of the head constructors k, then running (vᵢ)ⁱ against m is the same +as running it against the submatrix mₖ; otherwise (its head +constructor is none of the k) it is equivalent to running it against +the wildcard submatrix. + +We formalize this intuition as follows: +Lemma (Groups): +Let \[m\] be a matrix with \[Groups(m) = (k_r \to m_r)^k, m_{wild}\]. +For any value vector $(v_i)^l$ such that $v_0 = k(v'_l)^l$ for some +constructor k, +we have: +| if k = kₖ \text{ for some k then} +| \quad m(vᵢ)ⁱ = mₖ((v_{l}')ˡ +++ (v_{i})^{i∈I\DZ}) +| \text{else} +| \quad m(vᵢ)ⁱ = m_{wild}(vᵢ)^{i∈I\DZ} + +\begin{comment} +TODO: fix \{0} +\end{comment} + +*** Proof: +Let $m$ be a matrix with \[Group(m) = (k_r \to m_r)^k, m_{wild}\] +Let $(v_i)^i$ be an input matrix with $v_0 = k(v'_l)^l$ for some k. +We proceed by case analysis: +- either k is one of the kₖ for some k +- or k is none of the (kₖ)ᵏ + +Both m(vᵢ)ⁱ and mₖ(vₖ)ᵏ are defined as the first matching result of +a family over each row rⱼ of a matrix + +We know, from the definition of +Groups(m), that mₖ is +| ((a){0.l})^{l∈Idx(kₖ)} +++ (aᵢ)^{i∈I\DZ}), +| ( +| \quad if p_{0j} is k(qₗ) then +| \quad \quad (qₗ)ˡ +++ (p_{ij})^{i∈I\DZ } → eⱼ +| \quad if p_{0j} is _ then +| \quad \quad (_)ˡ +++ (p_{ij})^{i∈I\DZ} → eⱼ +| \quad else ⊥ +| )^{j∈J} + +By definition, m(vᵢ)ⁱ is +| m(vᵢ)ⁱ = First(rⱼ(vᵢ)ⁱ)ʲ for m = ((aᵢ)ⁱ, (rⱼ)ʲ) +| (pᵢ)ⁱ (vᵢ)ⁱ = { +| \quad if p₀ = k(qₗ)ˡ, v₀ = k'(v'ₖ)ᵏ, k=Idx(k') and l=Idx(k) +| \quad \quad if k ≠ k' then ⊥ +| if k = k' then ((qₗ)ˡ +++ (pᵢ)^{i∈I\DZ}) ((v'ₖ)ᵏ +++ (vᵢ)^{i∈I\DZ} ) +| if p₀ = (q₁\vert{}q₂) then +| First( (q₁pᵢ^{i∈I\DZ}) vᵢ^{i∈I\DZ}, (q₂pᵢ^{i∈I \DZ}) vᵢ^{i∈I\DZ})} + +For this reason, if we can prove that +| ∀j, rⱼ(vᵢ)ⁱ = r'ⱼ((v'ₖ)ᵏ ++ (vᵢ)ⁱ) +it follows that +| m(vᵢ)ⁱ = mₖ((v'ₖ)ᵏ ++ (vᵢ)ⁱ) +from the above definition. + +We can also show that aᵢ = (a_{0.l})ˡ +++ a_{i∈I\DZ} because v(a₀) = K(v(a){0.l})ˡ) + + + +** Target translation + +The target program of the following general form is parsed using a parser +generated by Menhir, a LR(1) parser generator for the OCaml programming language. +Menhir compiles LR(1) a grammar specification, in this case a subset +of the Lambda intermediate language, down to OCaml code. +This is the grammar of the target language (TODO: check menhir grammar) +| start ::= sexpr +| sexpr ::= variable \vert{} string \vert{} "(" special_form ")" +| string ::= "\"" identifier "\"" ;; string between doublequotes +| variable ::= identifier +| special_form ::= let\vert{}catch\vert{}if\vert{}switch\vert{}switch-star\vert{}field\vert{}apply\vert{}isout +| let ::= "let" assignment sexpr ;; (assignment sexpr)+ outside of pattern match code +| assignment ::= "function" variable variable+ ;; the first variable is the identifier of the function +| field ::= "field" digit variable +| apply ::= ocaml_lambda_code ;; arbitrary code +| catch ::= "catch" sexpr with sexpr +| with ::= "with" "(" label ")" +| exit ::= "exit" label +| switch-star ::= "switch*" variable case* +| switch::= "switch" variable case* "default:" sexpr +| case ::= "case" casevar ":" sexpr +| casevar ::= ("tag"\vert{}"int") integer +| if ::= "if" bexpr sexpr sexpr +| bexpr ::= "(" ("!="\vert{}"=="\vert{}">="\vert{}"<="\vert{}">"\vert{}"<") sexpr digit \vert{} field ")" +| label ::= integer +The prototype doesn't support strings. + +The AST built by the parser is traversed and evaluated by the symbolic +execution engine. +Given that the target language supports jumps in the form of "catch - exit" +blocks the engine tries to evaluate the instructions inside the blocks +and stores the result of the partial evaluation into a record. +When a jump is encountered, the information at the point allows to +finalize the evaluation of the jump block. +In the environment the engine also stores bindings to values and +functions. +Integer additions and subtractions are simplified in a second pass. +The result of the symbolic evaluation is a target decision tree Cₜ +| Cₜ ::= Leaf bb \vert Switch(a, (πᵢ → Cᵢ)^{i∈S} , C?) \vert Failure +| vₜ ::= Cell(tag ∈ ℕ, (vᵢ)^{i∈I}) \vert n ∈ ℕ +Every branch of the decision tree is "constrained" by a domain +| Domain π = { n\vert{}n∈ℕ x n\vert{}n∈Tag⊆ℕ } +Intuitively, the π condition at every branch tells us the set of +possible values that can "flow" through that path. +π conditions are refined by the engine during the evaluation; at the +root of the decision tree the domain corresponds to the set of +possible values that the type of the function can hold. +C? is the fallback node of the tree that is taken whenever the value +at that point of the execution can't flow to any other subbranch. +Intuitively, the π_{fallback} condition of the C? fallback node is +| π_{fallback} = ¬\bigcup\limits_{i∈I}πᵢ +The fallback node can be omitted in the case where the domain of the +children nodes correspond to set of possible values pointed by the +accessor at that point of the execution +| domain(vₛ(a)) = \bigcup\limits_{i∈I}πᵢ +We say that a translation of a target program to a decision tree +is correct when for every possible input, the target program and its +respective decision tree produces the same result +| ∀vₜ, tₜ(vₜ) = 〚tₜ〛ₜ(vₜ) + + + +** Equivalence checking + +The equivalence checking algorithm takes as input a domain of +possible values \emph{S} and a pair of source and target decision trees and +in case the two trees are not equivalent it returns a counter example. +The algorithm respects the following correctness statement: + +\begin{comment} +TODO: we have to define what \coversTEX mean for readers to understand the specifications +(or we use a simplifying lie by hiding \coversTEX in the statements). +\end{comment} + +\begin{align*} + \EquivTEX S {C_S} {C_T} \emptyqueue = \YesTEX \;\land\; \coversTEX {C_T} S + & \implies + \forall v_S \approx v_T \in S,\; C_S(v_S) = C_T(v_T) +\\ + \EquivTEX S {C_S} {C_T} \emptyqueue = \NoTEX {v_S} {v_T} \;\land\; \coversTEX {C_T} S + & \implies + v_S \approx v_T \in S \;\land\; C_S(v_S) \neq C_T(v_T) +\end{align*} +Our equivalence-checking algorithm $\EquivTEX S {C_S} {C_T} G$ is +a exactly decision procedure for the provability of the judgment +$(\EquivTEX S {C_S} {C_T} G)$, defined below. +\begin{mathpar} + \begin{array}{l@{~}r@{~}l} + & & \text{\emph{constraint trees}} \\ + C & \bnfeq & \Leaf t \\ + & \bnfor & \Failure \\ + & \bnfor & \Switch a {\Fam i {\pi_i, C_i}} \Cfb \\ + & \bnfor & \Guard t {C_0} {C_1} \\ + \end{array} + + \begin{array}{l@{~}r@{~}l} + & & \text{\emph{boolean result}} \\ + b & \in & \{ 0, 1 \} \\[0.5em] + & & \text{\emph{guard queues}} \\ + G & \bnfeq & (t_1 = b_1), \dots, (t_n = b_n) \\ + \end{array} + + \begin{array}{l@{~}r@{~}l} + & & \text{\emph{input space}} \\ + S & \subseteq & \{ (v_S, v_T) \mid \vrel {v_S} {v_T} \} \\ + \end{array} + \\ + \infer{ } + {\EquivTEX \emptyset {C_S} {C_T} G} + + \infer{ } + {\EquivTEX S \Failure \Failure \emptyqueue} + + \infer + {\trel {t_S} {t_T}} + {\EquivTEX S {\Leaf {t_S}} {\Leaf {t_T}} \emptyqueue} + + \infer + {\forall i,\; + \EquivTEX + {(S \land a = K_i)} + {C_i} {\trim {C_T} {a = K_i}} G + \\ + \EquivTEX + {(S \land a \notin \Fam i {K_i})} + \Cfb {\trim {C_T} {a \notin \Fam i {K_i}}} G + } + {\EquivTEX S + {\Switch a {\Fam i {K_i, C_i}} \Cfb} {C_T} G} + +\begin{comment} + % TODO explain somewhere why the judgment is not symmetric: + % we avoid defining trimming on source trees, which would + % require more expressive conditions than just constructors +\end{comment} + \infer + {C_S \in {\Leaf t, \Failure} + \\ + \forall i,\; \EquivTEX {(S \land a \in D_i)} {C_S} {C_i} G + \\ + \EquivTEX {(S \land a \notin \Fam i {D_i})} {C_S} \Cfb G} + {\EquivTEX S + {C_S} {\Switch a {\Fam i {D_i} {C_i}} \Cfb} G} + + \infer + {\EquivTEX S {C_0} {C_T} {G, (t_S = 0)} + \\ + \EquivTEX S {C_1} {C_T} {G, (t_S = 1)}} + {\EquivTEX S + {\Guard {t_S} {C_0} {C_1}} {C_T} G} + + \infer + {\trel {t_S} {t_T} + \\ + \EquivTEX S {C_S} {C_b} G} + {\EquivTEX S + {C_S} {\Guard {t_T} {C_0} {C_1}} {(t_S = b), G}} +\end{mathpar} +Running a program tₛ or its translation 〚tₛ〛 against an input vₛ +produces as a result /r/ in the following way: +| ( 〚tₛ〛ₛ(vₛ) ≡ Cₛ(vₛ) ) → r +| tₛ(vₛ) → r +Likewise +| ( 〚tₜ〛ₜ(vₜ) ≡ Cₜ(vₜ) ) → r +| tₜ(vₜ) → r +| result r ::= guard list * (Match blackbox \vert{} NoMatch \vert{} Absurd) +| guard ::= blackbox. +Having defined equivalence between two inputs of which one is +expressed in the source language and the other in the target language, +vₛ ≃ vₜ, we can define the equivalence between a couple of programs or +a couple of decision trees +| tₛ ≃ tₜ := ∀vₛ≃vₜ, tₛ(vₛ) = tₜ(vₜ) +| Cₛ ≃ Cₜ := ∀vₛ≃vₜ, Cₛ(vₛ) = Cₜ(vₜ) +The result of the proposed equivalence algorithm is /Yes/ or /No(vₛ, +vₜ)/. In particular, in the negative case, vₛ and vₜ are a couple of +possible counter examples for which the decision trees produce a +different result. + +In the presence of guards we can say that two results are +equivalent modulo the guards queue, written /r₁ ≃gs r₂/, when: +| (gs₁, r₁) ≃gs (gs₂, r₂) ⇔ (gs₁, r₁) = (gs₂ ++ gs, r₂) +We say that Cₜ covers the input space /S/, written +/covers(Cₜ, S)/ when every value vₛ∈S is a valid input to the +decision tree Cₜ. (TODO: rephrase) +Given an input space /S/ and a couple of decision trees, where +the target decision tree Cₜ covers the input space /S/, we say that +the two decision trees are equivalent when: +| equiv(S, Cₛ, Cₜ, gs) = Yes ∧ covers(Cₜ, S) → ∀vₛ≃vₜ ∈ S, Cₛ(vₛ) ≃gs Cₜ(vₜ) +Similarly we say that a couple of decision trees in the presence of +an input space /S/ are /not/ equivalent when: +| equiv(S, Cₛ, Cₜ, gs) = No(vₛ,vₜ) ∧ covers(Cₜ, S) → vₛ≃vₜ ∈ S ∧ Cₛ(vₛ) ≠gs Cₜ(vₜ) +Corollary: For a full input space /S/, that is the universe of the +target program we say: +| equiv(S, 〚tₛ〛ₛ, 〚tₜ〛ₜ, ∅) = Yes ⇔ tₛ ≃ tₜ + + +\begin{comment} +TODO: put ^i∈I where needed +\end{comment} +\subsubsection{The trimming lemma} +The trimming lemma allows to reduce the size of a decision tree given +an accessor /a/ → π relation (TODO: expand) +| ∀vₜ ∈ (a→π), Cₜ(vₜ) = C_{t/a→π}(vₜ) +We prove this by induction on Cₜ: + +- Cₜ = Leaf_{bb}: when the decision tree is a leaf terminal, the result of trimming on a Leaf is the Leaf itself + | Leaf_{bb/a→π}(v) = Leaf_{bb}(v) +- The same applies to Failure terminal + | Failure_{/a→π}(v) = Failure(v) +- When Cₜ = Switch(b, (π→Cᵢ)ⁱ)_{/a→π} then we look at the accessor + /a/ of the subtree Cᵢ and we define πᵢ' = πᵢ if a≠b else πᵢ∩π Trimming + a switch node yields the following result: + | Switch(b, (π→Cᵢ)^{i∈I})_{/a→π} := Switch(b, (π'ᵢ→C_{i/a→π})^{i∈I}) +\begin{comment} +TODO: understand how to properly align lists +check that every list is aligned +\end{comment} +For the trimming lemma we have to prove that running the value vₜ against +the decision tree Cₜ is the same as running vₜ against the tree +C_{trim} that is the result of the trimming operation on Cₜ +| Cₜ(vₜ) = C_{trim}(vₜ) = Switch(b, (πᵢ'→C_{i/a→π})^{i∈I})(vₜ) +We can reason by first noting that when vₜ∉(b→πᵢ)ⁱ the node must be a Failure node. +In the case where ∃k \vert{} vₜ∈(b→πₖ) then we can prove that +| C_{k/a→π}(vₜ) = Switch(b, (πᵢ'→C_{i/a→π})^{i∈I})(vₜ) +because when a ≠ b then πₖ'= πₖ and this means that vₜ∈πₖ' +while when a = b then πₖ'=(πₖ∩π) and vₜ∈πₖ' because: +- by the hypothesis, vₜ∈π +- we are in the case where vₜ∈πₖ +So vₜ ∈ πₖ' and by induction +| Cₖ(vₜ) = C_{k/a→π}(vₜ) +We also know that ∀vₜ∈(b→πₖ) → Cₜ(vₜ) = Cₖ(vₜ) +By putting together the last two steps, we have proven the trimming +lemma. + +\begin{comment} +TODO: what should I say about covering??? I swap π and π' +Covering lemma: + ∀a,π covers(Cₜ,S) → covers(C_{t/a→π}, (S∩a→π)) + Uᵢπⁱ ≈ Uᵢπ'∩(a→π) ≈ (Uᵢπ')∩(a→π) %% + + +%%%%%%% Also: Should I swap π and π' ? +\end{comment} + +\subsubsection{Equivalence checking} +The equivalence checking algorithm takes as parameters an input space +/S/, a source decision tree /Cₛ/ and a target decision tree /Cₜ/: +| equiv(S, Cₛ, Cₜ) → Yes \vert{} No(vₛ, vₜ) + +When the algorithm returns Yes and the input space is covered by /Cₛ/ +we can say that the couple of decision trees are the same for +every couple of source value /vₛ/ and target value /vₜ/ that are equivalent. +\begin{comment} +Define "covered" +Is it correct to say the same? How to correctly distinguish in words ≃ and = ? +\end{comment} +| equiv(S, Cₛ, Cₜ) = Yes and cover(Cₜ, S) → ∀ vₛ ≃ vₜ∈S ∧ Cₛ(vₛ) = Cₜ(vₜ) +In the case where the algorithm returns No we have at least a couple +of counter example values vₛ and vₜ for which the two decision trees +outputs a different result. +| equiv(S, Cₛ, Cₜ) = No(vₛ,vₜ) and cover(Cₜ, S) → ∀ vₛ ≃ vₜ∈S ∧ Cₛ(vₛ) ≠ Cₜ(vₜ) +We define the following +| Forall(Yes) = Yes +| Forall(Yes::l) = Forall(l) +| Forall(No(vₛ,vₜ)::_) = No(vₛ,vₜ) +There exists and are injective: +| int(k) ∈ ℕ (arity(k) = 0) +| tag(k) ∈ ℕ (arity(k) > 0) +| π(k) = {n\vert int(k) = n} x {n\vert tag(k) = n} +where k is a constructor. + +\begin{comment} +TODO: explain: +∀v∈a→π, C_{/a→π}(v) = C(v) +\end{comment} + +We proceed by case analysis: +\begin{comment} +I start numbering from zero to leave the numbers as they were on the blackboard, were we skipped some things +I think the unreachable case should go at the end. +\end{comment} +0. in case of unreachable: +| Cₛ(vₛ) = Absurd(Unreachable) ≠ Cₜ(vₜ) ∀vₛ,vₜ +1. In the case of an empty input space + | equiv(∅, Cₛ, Cₜ) := Yes + and that is trivial to prove because there is no pair of values (vₛ, vₜ) that could be + tested against the decision trees. + In the other subcases S is always non-empty. +2. When there are /Failure/ nodes at both sides the result is /Yes/: + |equiv(S, Failure, Failure) := Yes + Given that ∀v, Failure(v) = Failure, the statement holds. +3. When we have a Leaf or a Failure at the left side: + | equiv(S, Failure as Cₛ, Switch(a, (πᵢ → Cₜᵢ)^{i∈I})) := Forall(equiv( S∩a→π(kᵢ)), Cₛ, Cₜᵢ)^{i∈I}) + | equiv(S, Leaf bbₛ as Cₛ, Switch(a, (πᵢ → Cₜᵢ)^{i∈I})) := Forall(equiv( S∩a→π(kᵢ)), Cₛ, Cₜᵢ)^{i∈I}) + The algorithm either returns Yes for every sub-input space Sᵢ := S∩(a→π(kᵢ)) and + subtree Cₜᵢ + | equiv(Sᵢ, Cₛ, Cₜᵢ) = Yes ∀i + or we have a counter example vₛ, vₜ for which + | vₛ≃vₜ∈Sₖ ∧ cₛ(vₛ) ≠ Cₜₖ(vₜ) + then because + | vₜ∈(a→πₖ) → Cₜ(vₜ) = Cₜₖ(vₜ) , + | vₛ≃vₜ∈S ∧ Cₛ(vₛ)≠Cₜ(vₜ) + we can say that + | equiv(Sᵢ, Cₛ, Cₜᵢ) = No(vₛ, vₜ) for some minimal k∈I +4. When we have a Switch on the right we define π_{fallback} as the domain of + values not covered but the union of the constructors kᵢ + | π_{fallback} = ¬\bigcup\limits_{i∈I}π(kᵢ) + The algorithm proceeds by trimming + | equiv(S, Switch(a, (kᵢ → Cₛᵢ)^{i∈I}, C_{sf}), Cₜ) := + | Forall(equiv( S∩(a→π(kᵢ)^{i∈I}), Cₛᵢ, C_{t/a→π(kᵢ)})^{i∈I} +++ equiv(S∩(a→πₙ), Cₛ, C_{a→π_{fallback}})) + The statement still holds and we show this by first analyzing the + /Yes/ case: + | Forall(equiv( S∩(a→π(kᵢ)^{i∈I}), Cₛᵢ, C_{t/a→π(kᵢ)})^{i∈I} = Yes + The constructor k is either included in the set of constructors kᵢ: + | k \vert k∈(kᵢ)ⁱ ∧ Cₛ(vₛ) = Cₛᵢ(vₛ) + We also know that + | (1) Cₛᵢ(vₛ) = C_{t/a→πᵢ}(vₜ) + | (2) C_{T/a→πᵢ}(vₜ) = Cₜ(vₜ) + (1) is true by induction and (2) is a consequence of the trimming lemma. + Putting everything together: + | Cₛ(vₛ) = Cₛᵢ(vₛ) = C_{T/a→πᵢ}(vₜ) = Cₜ(vₜ) + + When the k∉(kᵢ)ⁱ [TODO] + + The auxiliary Forall function returns /No(vₛ, vₜ)/ when, for a minimum k, + | equiv(Sₖ, Cₛₖ, C_{T/a→πₖ} = No(vₛ, vₜ) + Then we can say that + | Cₛₖ(vₛ) ≠ C_{t/a→πₖ}(vₜ) + that is enough for proving that + | Cₛₖ(vₛ) ≠ (C_{t/a→πₖ}(vₜ) = Cₜ(vₜ)) diff --git a/tesi/tesi.pdf b/tesi/tesi.pdf index 0e971de..0f108e9 100644 Binary files a/tesi/tesi.pdf and b/tesi/tesi.pdf differ diff --git a/tesi/tesi_unicode.org b/tesi/tesi_unicode.org index 5a43f89..b8a930b 100644 --- a/tesi/tesi_unicode.org +++ b/tesi/tesi_unicode.org @@ -1,7 +1,7 @@ \begin{comment} TODO: not all todos are explicit. Check every comment section TODO: chiedi a Gabriel se T e S vanno bene, ma prima controlla che siano coerenti -* TODO Scaletta [1/6] +* TODO Scaletta [1/5] - [X] Introduction - [-] Background [80%] - [X] Low level representation @@ -9,16 +9,14 @@ TODO: chiedi a Gabriel se T e S vanno bene, ma prima controlla che siano coerent - [X] Pattern matching - [X] Symbolic execution - [ ] Translation Validation - - [ ] Translation validation of the Pattern Matching Compiler - - [ ] Source translation - - [ ] Formal Grammar - - [ ] Compilation of source patterns - - [ ] Rest? + - [-] Translation validation of the Pattern Matching Compiler + - [X] Source translation + - [X] Formal Grammar + - [X] Compilation of source patterns - [ ] Target translation - [ ] Formal Grammar - [ ] Symbolic execution of the Lambda target - - [ ] Equivalence between source and target - - [ ] Proof of correctness + - [X] Equivalence between source and target - [ ] Practical results Magari prima pattern matching poi compilatore? @@ -61,6 +59,7 @@ clause matrix #+LaTeX_HEADER: \newcommand{\EquivTEX}[3]{\mathsf{equiv}(#1, #2, #3)} % \equiv is already taken #+LaTeX_HEADER: \newcommand{\coversTEX}[2]{#1 \mathrel{\mathsf{covers}} #2} #+LaTeX_HEADER: \newcommand{\YesTEX}{\mathsf{Yes}} +#+LaTeX_HEADER: \newcommand{\DZ}{\backslash\text{\{0\}}} #+LaTeX_HEADER: \newcommand{\NoTEX}[2]{\mathsf{No}(#1, #2)} #+LaTeX_HEADER: \usepackage{comment} #+LaTeX_HEADER: \usepackage{mathpartir} @@ -899,9 +898,7 @@ following form: |⋮ |\vert pₙ \to exprₙ -and can include any expression that is legal for the OCaml compiler, -such as /when/ guards and assignments. Patterns could or could not -be exhaustive. +Patterns could or could not be exhaustive. Pattern matching code could also be written using the more compact form: |function @@ -934,87 +931,18 @@ This BNF grammar describes formally the grammar of the source program: | assignment ::= "as" id | bexpr ::= _code ;; arbitrary code -\begin{comment} -Check that it is still coherent to this bnf -\end{comment} +A source program tₛ is a collection of pattern clauses pointing to +/bb/ terms. Running a program tₛ against an input value vₛ produces as +a result /r/: +| tₛ ::= (p → bb)^{i∈I} +| p ::= \vert K(pᵢ)ⁱ, i ∈ I \vert (p\vert{}q) \vert n ∈ ℕ +| r ::= guard list * (Match bb \vert{} NoMatch \vert{} Absurd) +| tₛ(vₛ) → r -Patterns are of the form -| pattern | type of pattern | -|-----------------+---------------------| -| _ | wildcard | -| x | variable | -| c(p₁,p₂,...,pₙ) | constructor pattern | -| (p₁\vert p₂) | or-pattern | +TODO: argument on what it means to run a source program -During compilation by the translators, expressions are compiled into -Lambda code and are referred as lambda code actions lᵢ. - -The entire pattern matching code is represented as a clause matrix -that associates rows of patterns (p_{i,1}, p_{i,2}, ..., p_{i,n}) to -lambda code action lⁱ -\begin{equation*} -(P → L) = -\begin{pmatrix} -p_{1,1} & p_{1,2} & \cdots & p_{1,n} & → l₁ \\ -p_{2,1} & p_{2,2} & \cdots & p_{2,n} & → l₂ \\ -\vdots & \vdots & \ddots & \vdots & → \vdots \\ -p_{m,1} & p_{m,2} & \cdots & p_{m,n} & → lₘ -\end{pmatrix} -\end{equation*} - -The pattern /p/ matches a value /v/, written as p ≼ v, when one of the -following rules apply - -|--------------------+---+--------------------+-------------------------------------------| -| _ | ≼ | v | ∀v | -| x | ≼ | v | ∀v | -| (p₁ \vert p₂) | ≼ | v | iff p₁ ≼ v or p₂ ≼ v | -| c(p₁, p₂, ..., pₐ) | ≼ | c(v₁, v₂, ..., vₐ) | iff (p₁, p₂, ..., pₐ) ≼ (v₁, v₂, ..., vₐ) | -| (p₁, p₂, ..., pₐ) | ≼ | (v₁, v₂, ..., vₐ) | iff pᵢ ≼ vᵢ ∀i ∈ [1..a] | -|--------------------+---+--------------------+-------------------------------------------| - -When a value /v/ matches pattern /p/ we say that /v/ is an /instance/ of /p/. - -Considering the pattern matrix P we say that the value vector -$\vec{v}$ = (v₁, v₂, ..., vᵢ) matches the line number i in P if and only if the following two -conditions are satisfied: -- p_{i,1}, p_{i,2}, \cdots, p_{i,n} ≼ (v₁, v₂, ..., vᵢ) -- ∀j < i p_{j,1}, p_{j,2}, \cdots, p_{j,n} ⋠ (v₁, v₂, ..., vᵢ) - -We can define the following three relations with respect to patterns: -- Patter p is less precise than pattern q, written p ≼ q, when all - instances of q are instances of p -- Pattern p and q are equivalent, written p ≡ q, when their instances - are the same -- Patterns p and q are compatible when they share a common instance - -\subsubsection{Parsing of the source program} - -The source program of the following general form is parsed using a parser -generated by Menhir, a LR(1) parser generator for the OCaml programming language. -Menhir compiles LR(1) a grammar specification, in this case the OCaml pattern matching -grammar, down to OCaml code. - -|match variable with -|\vert pattern₁ -> e₁ -|\vert pattern₂ -> e₂ -|⋮ -|\vert pₘ -> eₘ - -The result of parsing, when successful, results in a list of clauses -and a list of type declarations. -Every clause consists of three objects: a left-hand-side that is the -kind of pattern expressed, an option guard and a right-hand-side expression. -Patterns are encoded in the following way: -| pattern | type | -|-----------------+-------------| -| _ | Wildcard | -| p₁ as x | Assignment | -| c(p₁,p₂,...,pₙ) | Constructor | -| (p₁\vert p₂) | Orpat | - -Guards and right-hand-sides are treated as a blackbox of OCaml code. -A sound approach for treating these blackbox would be to inspect the +/guard/ and /bb/ expressions are treated as blackboxes of OCaml code. +A sound approach for treating these blackboxes would be to inspect the OCaml compiler during translation to Lambda code and extract the blackboxes compiled in their Lambda representation. This would allow to test for equality with the respective blackbox at @@ -1046,9 +974,74 @@ let _ = function | K2 _ -> observe 4 #+END_SRC +The source program is parsed using the ocaml-compiler-libs library. +The result of parsing, when successful, results in a list of clauses +and a list of type declarations. +Every clause consists of three objects: a left-hand-side that is the +kind of pattern expressed, an option guard and a right-hand-side expression. +Patterns are encoded in the following way: +| pattern | type | +|-----------------+-------------| +| _ | Wildcard | +| p₁ as x | Assignment | +| c(p₁,p₂,...,pₙ) | Constructor | +| (p₁\vert p₂) | Orpat | + + Once parsed, the type declarations and the list of clauses are encoded in the form of a matrix that is later evaluated using a matrix decomposition algorithm. +Patterns are of the form +| pattern | type of pattern | +|-----------------+---------------------| +| _ | wildcard | +| x | variable | +| c(p₁,p₂,...,pₙ) | constructor pattern | +| (p₁\vert p₂) | or-pattern | + +The pattern /p/ matches a value /v/, written as p ≼ v, when one of the +following rules apply + +|--------------------+---+--------------------+-------------------------------------------| +| _ | ≼ | v | ∀v | +| x | ≼ | v | ∀v | +| (p₁ \vert p₂) | ≼ | v | iff p₁ ≼ v or p₂ ≼ v | +| c(p₁, p₂, ..., pₐ) | ≼ | c(v₁, v₂, ..., vₐ) | iff (p₁, p₂, ..., pₐ) ≼ (v₁, v₂, ..., vₐ) | +| (p₁, p₂, ..., pₐ) | ≼ | (v₁, v₂, ..., vₐ) | iff pᵢ ≼ vᵢ ∀i ∈ [1..a] | +|--------------------+---+--------------------+-------------------------------------------| + +When a value /v/ matches pattern /p/ we say that /v/ is an /instance/ of /p/. + + +During compilation by the translators, expressions are compiled into +Lambda code and are referred as lambda code actions lᵢ. + +The entire pattern matching code is represented as a clause matrix +that associates rows of patterns (p_{i,1}, p_{i,2}, ..., p_{i,n}) to +lambda code action lⁱ +\begin{equation*} +(P → L) = +\begin{pmatrix} +p_{1,1} & p_{1,2} & \cdots & p_{1,n} & → l₁ \\ +p_{2,1} & p_{2,2} & \cdots & p_{2,n} & → l₂ \\ +\vdots & \vdots & \ddots & \vdots & → \vdots \\ +p_{m,1} & p_{m,2} & \cdots & p_{m,n} & → lₘ +\end{pmatrix} +\end{equation*} + +Considering the pattern matrix P we say that the value vector +$\vec{v}$ = (v₁, v₂, ..., vᵢ) matches the line number i in P if and only if the following two +conditions are satisfied: +- p_{i,1}, p_{i,2}, \cdots, p_{i,n} ≼ (v₁, v₂, ..., vᵢ) +- ∀j < i p_{j,1}, p_{j,2}, \cdots, p_{j,n} ⋠ (v₁, v₂, ..., vᵢ) + +We can define the following three relations with respect to patterns: +- Pattern p is less precise than pattern q, written p ≼ q, when all + instances of q are instances of p +- Pattern p and q are equivalent, written p ≡ q, when their instances + are the same +- Patterns p and q are compatible when they share a common instance + \subsubsection{Matrix decomposition of pattern clauses} The initial input of the decomposition algorithm C consists of a vector of variables @@ -1076,8 +1069,7 @@ C₀((), → l₂ \\ → \vdots \\ → lₘ -\end{pmatrix}) -) = l₁ +\end{pmatrix}) = l₁ \end{equation*} When $\vec{x}$ ≠ () then the compilation advances using one of the @@ -1118,25 +1110,25 @@ following four rules: constructor and we define new fresh variables y₁, y₂, ..., yₐ so that the lambda action lᵢ becomes -\begin{lstlisting}[mathescape,columns=fullflexible,basicstyle=\fontfamily{lmvtt}\selectfont,] - (let (y₁ (field 0 x₁)) - (y₂ (field 1 x₁)) - ... - (yₐ (field (a-1) x₁)) - lᵢ) -\end{lstlisting} + \begin{lstlisting}[mathescape,columns=fullflexible,basicstyle=\fontfamily{lmvtt}\selectfont,] + (let (y₁ (field 0 x₁)) + (y₂ (field 1 x₁)) + ... + (yₐ (field (a-1) x₁)) + lᵢ) + \end{lstlisting} and the result of the compilation for the set of constructors {c₁, c₂, ..., cₖ} is: -\begin{lstlisting}[mathescape,columns=fullflexible,basicstyle=\fontfamily{lmvtt}\selectfont,] - switch x₁ with - case c₁: l₁ - case c₂: l₂ - ... - case cₖ: lₖ - default: exit -\end{lstlisting} + \begin{lstlisting}[mathescape,columns=fullflexible,basicstyle=\fontfamily{lmvtt}\selectfont,] + switch x₁ with + case c₁: l₁ + case c₂: l₂ + ... + case cₖ: lₖ + default: exit + \end{lstlisting} 3) Orpat rule: there are various strategies for dealing with or-patterns. The most naive one is to split the or-patterns. @@ -1161,9 +1153,239 @@ following four rules: apply, and P₂ → L₂ containing the remaining rows. The algorithm is applied to both matrices. + +In our prototype we make use of accessors to encode stored values. +\begin{minipage}{0.2\linewidth} +\begin{verbatim} +let value = 1 :: 2 :: 3 :: [] +(* that can also be written *) +let value = [] + |> List.cons 3 + |> List.cons 2 + |> List.cons 1 +\end{verbatim} +\end{minipage} +\hfill +\begin{minipage}{0.5\linewidth} +\begin{verbatim} + + + (field 0 x) = 1 + (field 0 (field 1 x)) = 2 + (field 0 (field 1 (field 1 x)) = 3 + (field 0 (field 1 (field 1 (field 1 x)) = [] +\end{verbatim} +\end{minipage} +An \emph{accessor} /a/ represents the +access path to a value that can be reached by deconstructing the +scrutinee. +| a ::= Here \vert n.a +The above example, in encoded form: +\begin{verbatim} +Here = 1 +1.Here = 2 +1.1.Here = 3 +1.1.1.Here = [] +\end{verbatim} +In our prototype the source matrix mₛ is defined as follows +| SMatrix mₛ := (aⱼ)^{j∈J}, ((p_{ij})^{j∈J} → bbᵢ)^{i∈I} + +Source matrices are used to build source decision trees Cₛ. +A decision tree is defined as either a Leaf, a Failure terminal or +an intermediate node with different children sharing the same accessor /a/ +and an optional fallback. +Failure is emitted only when the patterns don't cover the whole set of +possible input values /S/. The fallback is not needed when the user +doesn't use a wildcard pattern. +%%% Give example of thing +| Cₛ ::= Leaf bb \vert Switch(a, (Kᵢ → Cᵢ)^{i∈S} , C?) \vert Failure \vert Unreachable +| vₛ ::= K(vᵢ)^{i∈I} \vert n ∈ ℕ +\begin{comment} +Are K and Here clear here? +\end{comment} +We say that a translation of a source program to a decision tree +is correct when for every possible input, the source program and its +respective decision tree produces the same result + +| ∀vₛ, tₛ(vₛ) = 〚tₛ〛ₛ(vₛ) + +We define the decision tree of source programs +〚tₛ〛 +in terms of the decision tree of pattern matrices +〚mₛ〛 +by the following: +| 〚((pᵢ → bbᵢ)^{i∈I}〛 := 〚(Here), (pᵢ → bbᵢ)^{i∈I} 〛 +Decision tree computed from pattern matrices respect the following invariant: +| ∀v (vᵢ)^{i∈I} = v(aᵢ)^{i∈I} → 〚m〛(v) = m(vᵢ)^{i∈I} for m = ((aᵢ)^{i∈I}, (rᵢ)^{i∈I}) +| v(Here) = v +| K(vᵢ)ⁱ(k.a) = vₖ(a) if k ∈ [0;n[ +\begin{comment} +TODO: EXPLAIN +\end{comment} + +We proceed to show the correctness of the invariant by a case +analysys. + +Base cases: +1. [| ∅, (∅ → bbᵢ)ⁱ |] ≡ Leaf bbᵢ where i := min(I), that is a + decision tree [|ms|] defined by an empty accessor and empty + patterns pointing to blackboxes /bbᵢ/. This respects the invariant + because a source matrix in the case of empty rows returns + the first expression and (Leaf bb)(v) := Match bb +2. [| (aⱼ)ʲ, ∅ |] ≡ Failure + +Regarding non base cases: +Let's first define +| let Idx(k) := [0; arity(k)[ +| let First(∅) := ⊥ +| let First((aⱼ)ʲ) := a_{min(j∈J≠∅)} +\[ +m := ((a_i)^i ((p_{ij})^i \to e_j)^{ij}) +\] +\[ + (k_k)^k := headconstructor(p_{i0})^i +\] +\begin{equation} + Groups(m) := ( k_k \to ((a)_{0.l})^{l \in Idx(k_k)} +++ (a_i)^{i \in I\DZ}), \\ + ( if p_{0j} is k(q_l) then \\ + (qₗ)^{l \in Idx(k_k)} +++ (p_{ij})^{i \in I\DZ} \to e_j \\ + if p_{0j} is \_ then \\ + (\_)^{l \in Idx(k_k)} +++ (p_{ij})^{i \in I\DZ} \to e_j \\ + else \bot )^j ), \\ + ((a_i)^{i \in I\DZ}, ((p_{ij})^{i \in I\DZ} \to eⱼ if p_{0j} is \_ else \bot)^{j \in J}) +\end{equation} + +Groups(m) is an auxiliary function that source a matrix m into +submatrices, according to the head constructor of their first pattern. +Groups(m) returns one submatrix m_r for each head constructor k that +occurs on the first row of m, plus one "wildcard submatrix" m_{wild} +that matches on all values that do not start with one of those head +constructors. + +Intuitively, m is equivalent to its decomposition in the following +sense: if the first pattern of an input vector (vᵢ)ⁱ starts with one +of the head constructors k, then running (vᵢ)ⁱ against m is the same +as running it against the submatrix mₖ; otherwise (its head +constructor is none of the k) it is equivalent to running it against +the wildcard submatrix. + +We formalize this intuition as follows: +Lemma (Groups): +Let \[m\] be a matrix with \[Groups(m) = (k_r \to m_r)^k, m_{wild}\]. +For any value vector $(v_i)^l$ such that $v_0 = k(v'_l)^l$ for some +constructor k, +we have: +| if k = kₖ \text{ for some k then} +| \quad m(vᵢ)ⁱ = mₖ((v_{l}')ˡ +++ (v_{i})^{i∈I\DZ}) +| \text{else} +| \quad m(vᵢ)ⁱ = m_{wild}(vᵢ)^{i∈I\DZ} + +\begin{comment} +TODO: fix \{0} +\end{comment} + +*** Proof: +Let $m$ be a matrix with \[Group(m) = (k_r \to m_r)^k, m_{wild}\] +Let $(v_i)^i$ be an input matrix with $v_0 = k(v'_l)^l$ for some k. +We proceed by case analysis: +- either k is one of the kₖ for some k +- or k is none of the (kₖ)ᵏ + +Both m(vᵢ)ⁱ and mₖ(vₖ)ᵏ are defined as the first matching result of +a family over each row rⱼ of a matrix + +We know, from the definition of +Groups(m), that mₖ is +| ((a){0.l})^{l∈Idx(kₖ)} +++ (aᵢ)^{i∈I\DZ}), +| ( +| \quad if p_{0j} is k(qₗ) then +| \quad \quad (qₗ)ˡ +++ (p_{ij})^{i∈I\DZ } → eⱼ +| \quad if p_{0j} is _ then +| \quad \quad (_)ˡ +++ (p_{ij})^{i∈I\DZ} → eⱼ +| \quad else ⊥ +| )^{j∈J} + +By definition, m(vᵢ)ⁱ is +| m(vᵢ)ⁱ = First(rⱼ(vᵢ)ⁱ)ʲ for m = ((aᵢ)ⁱ, (rⱼ)ʲ) +| (pᵢ)ⁱ (vᵢ)ⁱ = { +| \quad if p₀ = k(qₗ)ˡ, v₀ = k'(v'ₖ)ᵏ, k=Idx(k') and l=Idx(k) +| \quad \quad if k ≠ k' then ⊥ +| if k = k' then ((qₗ)ˡ +++ (pᵢ)^{i∈I\DZ}) ((v'ₖ)ᵏ +++ (vᵢ)^{i∈I\DZ} ) +| if p₀ = (q₁\vert{}q₂) then +| First( (q₁pᵢ^{i∈I\DZ}) vᵢ^{i∈I\DZ}, (q₂pᵢ^{i∈I \DZ}) vᵢ^{i∈I\DZ})} + +For this reason, if we can prove that +| ∀j, rⱼ(vᵢ)ⁱ = r'ⱼ((v'ₖ)ᵏ ++ (vᵢ)ⁱ) +it follows that +| m(vᵢ)ⁱ = mₖ((v'ₖ)ᵏ ++ (vᵢ)ⁱ) +from the above definition. + +We can also show that aᵢ = (a_{0.l})ˡ +++ a_{i∈I\DZ} because v(a₀) = K(v(a){0.l})ˡ) + + + ** Target translation -TODO +The target program of the following general form is parsed using a parser +generated by Menhir, a LR(1) parser generator for the OCaml programming language. +Menhir compiles LR(1) a grammar specification, in this case a subset +of the Lambda intermediate language, down to OCaml code. +This is the grammar of the target language (TODO: check menhir grammar) +| start ::= sexpr +| sexpr ::= variable \vert{} string \vert{} "(" special_form ")" +| string ::= "\"" identifier "\"" ;; string between doublequotes +| variable ::= identifier +| special_form ::= let\vert{}catch\vert{}if\vert{}switch\vert{}switch-star\vert{}field\vert{}apply\vert{}isout +| let ::= "let" assignment sexpr ;; (assignment sexpr)+ outside of pattern match code +| assignment ::= "function" variable variable+ ;; the first variable is the identifier of the function +| field ::= "field" digit variable +| apply ::= ocaml_lambda_code ;; arbitrary code +| catch ::= "catch" sexpr with sexpr +| with ::= "with" "(" label ")" +| exit ::= "exit" label +| switch-star ::= "switch*" variable case* +| switch::= "switch" variable case* "default:" sexpr +| case ::= "case" casevar ":" sexpr +| casevar ::= ("tag"\vert{}"int") integer +| if ::= "if" bexpr sexpr sexpr +| bexpr ::= "(" ("!="\vert{}"=="\vert{}">="\vert{}"<="\vert{}">"\vert{}"<") sexpr digit \vert{} field ")" +| label ::= integer +The prototype doesn't support strings. + +The AST built by the parser is traversed and evaluated by the symbolic +execution engine. +Given that the target language supports jumps in the form of "catch - exit" +blocks the engine tries to evaluate the instructions inside the blocks +and stores the result of the partial evaluation into a record. +When a jump is encountered, the information at the point allows to +finalize the evaluation of the jump block. +In the environment the engine also stores bindings to values and +functions. +Integer additions and subtractions are simplified in a second pass. +The result of the symbolic evaluation is a target decision tree Cₜ +| Cₜ ::= Leaf bb \vert Switch(a, (πᵢ → Cᵢ)^{i∈S} , C?) \vert Failure +| vₜ ::= Cell(tag ∈ ℕ, (vᵢ)^{i∈I}) \vert n ∈ ℕ +Every branch of the decision tree is "constrained" by a domain +| Domain π = { n\vert{}n∈ℕ x n\vert{}n∈Tag⊆ℕ } +Intuitively, the π condition at every branch tells us the set of +possible values that can "flow" through that path. +π conditions are refined by the engine during the evaluation; at the +root of the decision tree the domain corresponds to the set of +possible values that the type of the function can hold. +C? is the fallback node of the tree that is taken whenever the value +at that point of the execution can't flow to any other subbranch. +Intuitively, the π_{fallback} condition of the C? fallback node is +| π_{fallback} = ¬\bigcup\limits_{i∈I}πᵢ +The fallback node can be omitted in the case where the domain of the +children nodes correspond to set of possible values pointed by the +accessor at that point of the execution +| domain(vₛ(a)) = \bigcup\limits_{i∈I}πᵢ +We say that a translation of a target program to a decision tree +is correct when for every possible input, the target program and its +respective decision tree produces the same result +| ∀vₜ, tₜ(vₜ) = 〚tₜ〛ₜ(vₜ) + + ** Equivalence checking @@ -1186,7 +1408,6 @@ TODO: we have to define what \coversTEX mean for readers to understand the speci & \implies v_S \approx v_T \in S \;\land\; C_S(v_S) \neq C_T(v_T) \end{align*} - Our equivalence-checking algorithm $\EquivTEX S {C_S} {C_T} G$ is a exactly decision procedure for the provability of the judgment $(\EquivTEX S {C_S} {C_T} G)$, defined below. @@ -1262,8 +1483,6 @@ $(\EquivTEX S {C_S} {C_T} G)$, defined below. {\EquivTEX S {C_S} {\Guard {t_T} {C_0} {C_1}} {(t_S = b), G}} \end{mathpar} - -* Correctness of the algorithm Running a program tₛ or its translation 〚tₛ〛 against an input vₛ produces as a result /r/ in the following way: | ( 〚tₛ〛ₛ(vₛ) ≡ Cₛ(vₛ) ) → r @@ -1271,211 +1490,37 @@ produces as a result /r/ in the following way: Likewise | ( 〚tₜ〛ₜ(vₜ) ≡ Cₜ(vₜ) ) → r | tₜ(vₜ) → r -where result r ::= guard list * (Match blackbox | NoMatch | Absurd) -and guard ::= blackbox. - +| result r ::= guard list * (Match blackbox \vert{} NoMatch \vert{} Absurd) +| guard ::= blackbox. Having defined equivalence between two inputs of which one is -expressed in the source language and the other in the target language - vₛ ≃ vₜ (TODO define, this talks about the representation of source values in the target) - -we can define the equivalence between a couple of programs or a couple -of decision trees +expressed in the source language and the other in the target language, +vₛ ≃ vₜ, we can define the equivalence between a couple of programs or +a couple of decision trees | tₛ ≃ tₜ := ∀vₛ≃vₜ, tₛ(vₛ) = tₜ(vₜ) | Cₛ ≃ Cₜ := ∀vₛ≃vₜ, Cₛ(vₛ) = Cₜ(vₜ) - The result of the proposed equivalence algorithm is /Yes/ or /No(vₛ, vₜ)/. In particular, in the negative case, vₛ and vₜ are a couple of possible counter examples for which the decision trees produce a different result. -** Statements -Theorem. We say that a translation of a source program to a decision tree -is correct when for every possible input, the source program and its -respective decision tree produces the same result - -| ∀vₛ, tₛ(vₛ) = 〚tₛ〛ₛ(vₛ) - - -Likewise, for the target language: - -| ∀vₜ, tₜ(vₜ) = 〚tₜ〛ₜ(vₜ) - -Definition: in the presence of guards we can say that two results are +In the presence of guards we can say that two results are equivalent modulo the guards queue, written /r₁ ≃gs r₂/, when: | (gs₁, r₁) ≃gs (gs₂, r₂) ⇔ (gs₁, r₁) = (gs₂ ++ gs, r₂) - -Definition: we say that Cₜ covers the input space /S/, written -/covers(Cₜ, S) when every value vₛ∈S is a valid input to the +We say that Cₜ covers the input space /S/, written +/covers(Cₜ, S)/ when every value vₛ∈S is a valid input to the decision tree Cₜ. (TODO: rephrase) - -Theorem: Given an input space /S/ and a couple of decision trees, where +Given an input space /S/ and a couple of decision trees, where the target decision tree Cₜ covers the input space /S/, we say that the two decision trees are equivalent when: - | equiv(S, Cₛ, Cₜ, gs) = Yes ∧ covers(Cₜ, S) → ∀vₛ≃vₜ ∈ S, Cₛ(vₛ) ≃gs Cₜ(vₜ) - Similarly we say that a couple of decision trees in the presence of an input space /S/ are /not/ equivalent when: - | equiv(S, Cₛ, Cₜ, gs) = No(vₛ,vₜ) ∧ covers(Cₜ, S) → vₛ≃vₜ ∈ S ∧ Cₛ(vₛ) ≠gs Cₜ(vₜ) - Corollary: For a full input space /S/, that is the universe of the target program we say: - | equiv(S, 〚tₛ〛ₛ, 〚tₜ〛ₜ, ∅) = Yes ⇔ tₛ ≃ tₜ -*** Proof of the correctness of the translation from source programs to source decision trees - -We define the source term tₛ as a collection of patterns pointing to blackboxes -| tₛ ::= (p → bb)^{i∈I} - -A pattern is defined as either a constructor pattern, an or pattern or -a constant pattern -| p ::= \vert K(pᵢ)ⁱ, i ∈ I \vert (p\vert{}q) \vert n ∈ ℕ - -A decision tree is defined as either a Leaf, a Failure terminal or -an intermediate node with different children sharing the same accessor /a/ -and an optional fallback. -Failure is emitted only when the patterns don't cover the whole set of -possible input values /S/. The fallback is not needed when the user -doesn't use a wildcard pattern. -%%% Give example of thing - -| Cₛ ::= Leaf bb \vert Switch(a, (Kᵢ → Cᵢ)^{i∈S} , C?) -| a ::= Here \vert n.a -| vₛ ::= K(vᵢ)^{i∈I} \vert n ∈ ℕ - -\begin{comment} -Are K and Here clear here? -\end{comment} - -We define the decomposition matrix /mₛ/ as -| SMatrix mₛ := (aⱼ)^{j∈J}, ((p_{ij})^{j∈J} → bbᵢ)^{i∈I} -\begin{comment} -Correggi prendendo in considerazione l'accessor -\end{comment} - -We define the decision tree of source programs -〚tₛ〛 -in terms of the decision tree of pattern matrices -〚mₛ〛 -by the following: -〚((pᵢ → bbᵢ)^{i∈I}〛 := 〚(Root), (pᵢ → bbᵢ)^{i∈I} 〛 - -decision tree computed from pattern matrices respect the following invariant: -| ∀v (vᵢ)^{i∈I} = v(aᵢ)^{i∈I} → 〚m〛(v) = m(vᵢ)^{i∈I} for m = ((aᵢ)^{i∈I}, (rᵢ)^{i∈I}) -where -| v(Here) = v -| K(vᵢ)ⁱ(k.a) = vₖ(a) if k ∈ [0;n[ -\begin{comment} -TODO: EXPLAIN -\end{comment} - -We proceed to show the correctness of the invariant by a case -analysys. - -Base cases: -1. [| ∅, (∅ → bbᵢ)ⁱ |] ≡ Leaf bbᵢ where i := min(I), that is a - decision tree [|ms|] defined by an empty accessor and empty - patterns pointing to blackboxes /bbᵢ/. This respects the invariant - because a decomposition matrix in the case of empty rows returns - the first expression and (Leaf bb)(v) := Match bb -2. [| (aⱼ)ʲ, ∅ |] ≡ Failure - -Regarding non base cases: -Let's first define -| let Idx(k) := [0; arity(k)[ -| let First(∅) := ⊥ -| let First((aⱼ)ʲ) := a_{min(j∈J≠∅)} -\[ -m := ((a_i)^i ((p_{ij})^i \to e_j)^{ij}) -\] -\[ - (k_k)^k := headconstructor(p_{i0})^i -\] -\begin{equation} - Groups(m) := ( k_k \to ((a)_{0.l})^{l \in Idx(k_k)} +++ (a_i)^{i \in I\backslash \{0\} }), \\ - ( if p_{0j} is k(q_l) then \\ - (qₗ)^{l \in Idx(k_k)} +++ (p_{ij})^{i \in I\backslash \{0\}} \to e_j \\ - if p_{0j} is \_ then \\ - (\_)^{l \in Idx(k_k)} +++ (p_{ij})^{i \in I\backslash \{0\}} \to e_j \\ - else \bot )^j ), \\ - ((a_i)^{i \in I\backslash \{0\}}, ((p_{ij})^{i \in I\backslash \{0\}} \to eⱼ if p_{0j} is \_ else \bot)^{j \in J}) -\end{equation} - -Groups(m) is an auxiliary function that decomposes a matrix m into -submatrices, according to the head constructor of their first pattern. -Groups(m) returns one submatrix m_r for each head constructor k that -occurs on the first row of m, plus one "wildcard submatrix" m_{wild} -that matches on all values that do not start with one of those head -constructors. - -Intuitively, m is equivalent to its decomposition in the following -sense: if the first pattern of an input vector (vᵢ)ⁱ starts with one -of the head constructors k, then running (vᵢ)ⁱ against m is the same -as running it against the submatrix mₖ; otherwise (its head -constructor is none of the k) it is equivalent to running it against -the wildcard submatrix. - -We formalize this intuition as follows: -Lemma (Groups): -Let \[m\] be a matrix with \[Groups(m) = (k_r \to m_r)^k, m_{wild}\]. -For any value vector \[(v_i)^l\] such that \[v_0 = k(v'_l)^l\] for some -constructor k, -we have: -\[ - if k = kₖ for some k then - m(vᵢ)ⁱ = mₖ((v'ₗ)ˡ +++ (vᵢ)^{i∈I\backslash \{0\}}) - else - m(vᵢ)ⁱ = m_{wild}(vᵢ)^{i∈I\backslash \{0\}} -\] - -*** Proof: -Let \[m\] be a matrix with \[Group(m) = (k_r \to m_r)^k, m_{wild}\]. -Let \[(v_i)^i\] be an input matrix with \[v_0 = k(v'_l)^l\] for some k. -We proceed by case analysis: -- either k is one of the kₖ for some k -- or k is none of the (kₖ)ᵏ - -Both m(vᵢ)ⁱ and mₖ(vₖ)ᵏ are defined as the first matching result of -a family over each row rⱼ of a matrix - -We know, from the definition of -Groups(m), that mₖ is -\[ - ((a){0.l})^{l∈Idx(kₖ)} +++ (aᵢ)^{i∈I\backslash \{0\}}), - ( - if p_{0j} is k(qₗ) then - (qₗ)ˡ +++ (p_{ij})^{i∈I\backslash \{0\}} → eⱼ - if p_{0j} is _ then - (_)ˡ +++ (p_{ij})^{i∈I\backslash \{0\}} → eⱼ - else ⊥ - )ʲ -\] - -By definition, m(vᵢ)ⁱ is -m(vᵢ)ⁱ = First(rⱼ(vᵢ)ⁱ)ʲ for m = ((aᵢ)ⁱ, (rⱼ)ʲ) -(pᵢ)ⁱ (vᵢ)ⁱ = { - if p₀ = k(qₗ)ˡ, v₀ = k'(v'ₖ)ᵏ, k=Idx(k') and l=Idx(k) - if k ≠ k' then ⊥ - if k = k' then ((qₗ)ˡ +++ (pᵢ)^{i∈I\backslash \{0\}}) ((v'ₖ)ᵏ +++ (vᵢ)^{i∈I\backslash \{0\}}) - if p₀ = (q₁|q₂) then - First( (q₁pᵢ^{i∈I \backslash \{0\}}) vᵢ^{i∈I \backslash \{0\}}, (q₂pᵢ^{i∈I \backslash \{0\}}) vᵢ^{i∈I \backslash \{0\}} ) -} - -For this reason, if we can prove that -| ∀j, rⱼ(vᵢ)ⁱ = r'ⱼ((v'ₖ)ᵏ ++ (vᵢ)ⁱ) -it follows that -| m(vᵢ)ⁱ = mₖ((v'ₖ)ᵏ ++ (vᵢ)ⁱ) -from the above definition. - -We can also show that aᵢ = a_{0.l}ˡ +++ a_{i∈I\backslash \{0\}} because v(a₀) = K(v(a){0.l})ˡ) - - - - -** Proof of equivalence checking \begin{comment} TODO: put ^i∈I where needed \end{comment} @@ -1493,17 +1538,10 @@ We prove this by induction on Cₜ: /a/ of the subtree Cᵢ and we define πᵢ' = πᵢ if a≠b else πᵢ∩π Trimming a switch node yields the following result: | Switch(b, (π→Cᵢ)^{i∈I})_{/a→π} := Switch(b, (π'ᵢ→C_{i/a→π})^{i∈I}) - \begin{comment} TODO: understand how to properly align lists check that every list is aligned \end{comment} -\begin{comment} -Actually in the proof.org file I transcribed: - e. Unreachabe → ⊥ -This is not correct because you don't have Unreachable nodes in target decision trees -\end{comment} - For the trimming lemma we have to prove that running the value vₜ against the decision tree Cₜ is the same as running vₜ against the tree C_{trim} that is the result of the trimming operation on Cₜ @@ -1548,7 +1586,6 @@ In the case where the algorithm returns No we have at least a couple of counter example values vₛ and vₜ for which the two decision trees outputs a different result. | equiv(S, Cₛ, Cₜ) = No(vₛ,vₜ) and cover(Cₜ, S) → ∀ vₛ ≃ vₜ∈S ∧ Cₛ(vₛ) ≠ Cₜ(vₜ) - We define the following | Forall(Yes) = Yes | Forall(Yes::l) = Forall(l) @@ -1592,12 +1629,12 @@ I think the unreachable case should go at the end. | vₛ≃vₜ∈S ∧ Cₛ(vₛ)≠Cₜ(vₜ) we can say that | equiv(Sᵢ, Cₛ, Cₜᵢ) = No(vₛ, vₜ) for some minimal k∈I -4. When we have a Switch on the right we define πₙ as the domain of +4. When we have a Switch on the right we define π_{fallback} as the domain of values not covered but the union of the constructors kᵢ - | πₙ = ¬(⋃π(kᵢ)^{i∈I}) + | π_{fallback} = ¬\bigcup\limits_{i∈I}π(kᵢ) The algorithm proceeds by trimming | equiv(S, Switch(a, (kᵢ → Cₛᵢ)^{i∈I}, C_{sf}), Cₜ) := - | Forall(equiv( S∩(a→π(kᵢ)^{i∈I}), Cₛᵢ, C_{t/a→π(kᵢ)})^{i∈I} +++ equiv(S∩(a→πₙ), Cₛ, C_{a→πₙ})) + | Forall(equiv( S∩(a→π(kᵢ)^{i∈I}), Cₛᵢ, C_{t/a→π(kᵢ)})^{i∈I} +++ equiv(S∩(a→πₙ), Cₛ, C_{a→π_{fallback}})) The statement still holds and we show this by first analyzing the /Yes/ case: | Forall(equiv( S∩(a→π(kᵢ)^{i∈I}), Cₛᵢ, C_{t/a→π(kᵢ)})^{i∈I} = Yes