diff --git a/tesi/conv.py b/tesi/conv.py index a2ef3e6..c99f990 100644 --- a/tesi/conv.py +++ b/tesi/conv.py @@ -7,7 +7,7 @@ try: except: allsymbols = json.load(open('../unicode-latex.json')) -mysymbols = ['≡', '≠', '≼', '→', '←', '⊀', '⋠', '≺', '∀', 'ε', '₀', '₂', '₁', '₃', '₄', 'ₐ', 'ₖ', 'ᵥ', 'ₘ', 'ₙ', 'ᵢ', 'ⁱ', '⋮', 'ₛ', 'ₜ', '≃', '⇔', '∧', '∅', 'ℕ', 'ⱼ', 'ʲ', '⊥', 'π', 'α', 'β', '∞', 'σ', '≤', '⊈', '∧', '∨', '∃', '⇒', '∩', '∉', '⋃', 'ᵏ', 'ₗ', 'ˡ', 'ₒ', 'ᵣ', 'ᴵ', '≈', '⊆' ] +mysymbols = ['≡', '≠', '≼', '→', '←', '⊀', '⋠', '≺', '∀', 'ε', '₀', '₂', '₁', '₃', '₄', 'ₐ', 'ₖ', 'ᵥ', 'ₘ', 'ₙ', 'ᵢ', 'ⁱ', '⋮', 'ₛ', 'ₜ', '≃', '⇔', '∧', '∅', 'ℕ', 'ⱼ', 'ʲ', '⊥', 'π', 'α', 'β', '∞', 'σ', '≤', '⊈', '∧', '∨', '∃', '⇒', '∩', '∉', '⋃', 'ᵏ', 'ₗ', 'ˡ', 'ₒ', 'ᵣ', 'ᴵ', '≈', '⊆', '↦' ] extrasymbols = {'〚': '\llbracket', '〛': r'\rrbracket', '̸': '\neg', '¬̸': '\neg', '∈': '\in ', 'ₛ': '_S', 'ₜ': '_T'} symbols = {s: allsymbols[s] for s in mysymbols} diff --git a/tesi/tesi.pdf b/tesi/tesi.pdf index 3a55c06..dca7974 100644 Binary files a/tesi/tesi.pdf and b/tesi/tesi.pdf differ diff --git a/tesi/tesi_unicode.org b/tesi/tesi_unicode.org index 9fd3215..5e6c6fa 100644 --- a/tesi/tesi_unicode.org +++ b/tesi/tesi_unicode.org @@ -300,7 +300,7 @@ simmetric to the correctness statement for the translation of source programs: The equivalence checking algorithm takes as input a domain of possible values \emph{S} and a pair of source and target decision trees and in case the two trees are not equivalent it returns a counter example. -The algorithm respects the following correctness statement: +Our algorithm respects the following correctness statement: \begin{comment} % TODO: we have to define what \coversTEX mean for readers to understand the specifications % (or we use a simplifying lie by hiding \coversTEX in the statements). @@ -868,38 +868,30 @@ We distinguish - Failure: a value matching this part results in an error - Leaf: a value matching this part results into the evaluation of a source black box of code - -The algorithm doesn't support type-declaration-based analysis +Our algorithm doesn't support type-declaration-based analysis to know the list of constructors at a given type. Let's consider some trivial examples: -#+BEGIN_SRC -function true -> 1 -#+END_SRC - +| function true -> 1 is translated to |Switch ([(true, Leaf 1)], Failure) while -#+BEGIN_SRC -function -true -> 1 -| false -> 2 -#+END_SRC +| function +| \vert{} true -> 1 +| \vert{} false -> 2 will be translated to -|Switch ([(true, Leaf 1); (false, Leaf 2)]) +|Switch ([(true, Leaf 1); (false, Leaf 2)]) It is possible to produce Unreachable examples by using refutation clauses (a "dot" in the right-hand-side) -#+BEGIN_SRC -function -true -> 1 -| false -> 2 -| _ -> . -#+END_SRC +|function +|\vert{} true -> 1 +|\vert{} false -> 2 +|\vert{} _ -> . that gets translated into -Switch ([(true, Leaf 1); (false, Leaf 2)], Unreachable) +| Switch ([(true, Leaf 1); (false, Leaf 2)], Unreachable) -We trust this annotation, which is reasonable as the type-checker +We trust this annotation, which is reasonable as the type-checker verifies that it indeed holds. Guard nodes of the tree are emitted whenever a guard is found. Guards @@ -911,7 +903,6 @@ true. \begin{comment} [ ] Finisci con Switch [ ] Spiega del fallback -[ ] rivedi compilazione per tenere in considerazione il tuo albero invece che le Lambda \end{comment} The source code of a pattern matching function has the @@ -946,8 +937,7 @@ This BNF grammar describes formally the grammar of the source program: | clause ::= lexpr "->" rexpr | lexpr ::= rule (ε\vert{}condition) | rexpr ::= _code ;; arbitrary code -| rule ::= wildcard\vert{}variable\vert{}constructor_pattern\vert{}or_pattern ;; -| ;; rules +| rule ::= wildcard\vert{}variable\vert{}constructor_pattern\vert or_pattern ;; | wildcard ::= "_" | variable ::= identifier | constructor_pattern ::= constructor (rule\vert{}ε) (assignment\vert{}ε) @@ -956,64 +946,6 @@ This BNF grammar describes formally the grammar of the source program: | condition ::= "when" bexpr | assignment ::= "as" id | bexpr ::= _code ;; arbitrary code - -A source program tₛ is a collection of pattern clauses pointing to -/bb/ terms. Running a program tₛ against an input value vₛ produces as -a result /r/: -| tₛ ::= (p → bb)^{i∈I} -| p ::= \vert K(pᵢ)ⁱ, i ∈ I \vert (p\vert{}q) \vert n ∈ ℕ -| r ::= guard list * (Match bb \vert{} NoMatch \vert{} Absurd) -| tₛ(vₛ) → r - -TODO: argument on what it means to run a source program - -/guard/ and /bb/ expressions are treated as blackboxes of OCaml code. -A sound approach for treating these blackboxes would be to inspect the -OCaml compiler during translation to Lambda code and extract the -blackboxes compiled in their Lambda representation. -This would allow to test for equality with the respective blackbox at -the target level. -Given that this level of introspection is currently not possibile, we -decided to restrict the structure of blackboxes to the following (valid) OCaml -code: - -#+BEGIN_SRC -external guard : 'a -> 'b = "guard" -external observe : 'a -> 'b = "observe" -#+END_SRC - -We assume these two external functions /guard/ and /observe/ with a valid -type that lets the user pass any number of arguments to them. -All the guards are of the form \texttt{guard }, where the - are expressed using the OCaml pattern matching language. -Similarly, all the right-hand-side expressions are of the form -\texttt{observe ...} with the same constraints on arguments. -#+BEGIN_SRC -type t = K1 | K2 of t (* declaration of an algebraic and recursive datatype t *) - -let _ = function - | K1 -> observe 0 - | K2 K1 -> observe 1 - | K2 x when guard x -> observe 2 (* guard inspects the x variable *) - | K2 (K2 x) as y when guard x y -> observe 3 - | K2 _ -> observe 4 -#+END_SRC -We note that the right hand side of /observe/ is just an arbitrary -value and in this case just enumerates the order in which expressions -appear. -Even if this is an oversimplification of the problem for the -prototype, it must be noted that at the compiler level we have the -possibility to compile the pattern clauses in two separate steps so -that the guards and right-hand-side expressions are semantically equal -to their counterparts at the target program level. -\begin{lstlisting} -let _ = function - | K1 -> lambda₀ - | K2 K1 -> lambda₁ - | K2 x when lambda-guard₀ -> lambda₂ - | K2 (K2 x) as y when lambda-guard₁ -> lambda₃ - | K2 _ -> lambda₄ -\end{lstlisting} The source program is parsed using the ocaml-compiler-libs library. The result of parsing, when successful, results in a list of clauses and a list of type declarations. @@ -1085,6 +1017,87 @@ We can define the following three relations with respect to patterns: are the same - Patterns p and q are compatible when they share a common instance +Wit the support of two auxiliary functions +- tail of an ordered family + | tail((xᵢ)^{i ∈ I}) := (xᵢ)^{i ≠ min(I)} +- first non-⊥ element of an ordered family + | First((xᵢ)ⁱ) := ⊥ \quad \quad \quad if ∀i, xᵢ = ⊥ + | First((xᵢ)ⁱ) := x_min{i \vert{} xᵢ ≠ ⊥} \quad if ∃i, xᵢ ≠ ⊥ +we now define what it means to run a pattern row against a value +vector of the same length, that is (pᵢ)ⁱ(vᵢ)ⁱ +| pᵢ | vᵢ | result_{pat} | +|--------------------------+----------------------+-------------------------------------------------| +| ∅ | (∅) | [] | +| (_, tail(pᵢ)ⁱ) | (vᵢ) | tail(pᵢ)ⁱ(tail(vᵢ)ⁱ) | +| (x, tail(pᵢ)ⁱ) | (vᵢ) | σ[x↦v₀] if tail(pᵢ)ⁱ(tail(vᵢ)ⁱ) = σ | +| (K(qⱼ)ʲ, tail(pᵢ)ⁱ) | (K(v'ⱼ)ʲ,tail(vⱼ)ʲ) | ((qⱼ)ʲ +++ tail(pᵢ)ⁱ)((v'ⱼ)ʲ +++ tail(vᵢ)ⁱ) | +| (K(qⱼ)ʲ, tail(pᵢ)ⁱ) | (K'(v'ₗ)ˡ,tail(vⱼ)ʲ) | ⊥ if K ≠ K' | +| (q₁\vert{}q₂, tail(pᵢ)ⁱ) | (vᵢ)ⁱ | First((q₁,tail(pᵢ)ⁱ)(vᵢ)ⁱ, (q₂,tail(pᵢ)ⁱ)(vᵢ)ⁱ) | + +A source program tₛ is a collection of pattern clauses pointing to +/bb/ terms. Running a program tₛ against an input value vₛ, written +tₛ(vₛ) produces a result /r/ belonging to the following grammar: +| tₛ ::= (p → bb)^{i∈I} +| tₛ(vₛ) → r +| r ::= guard list * (Match bb \vert{} NoMatch \vert{} Absurd) +\begin{comment} +TODO: understand how to explain guard +\end{comment} +We can define what it means to run an input value vₛ against a source +program tₛ: +| tₛ(vₛ) := ⊥ \quad \quad \quad if ∀i, pᵢ(vₛ) = ⊥ +| First((xᵢ)ⁱ) := x_min{i \vert{} xᵢ ≠ ⊥} \quad if ∃i, xᵢ ≠ ⊥ +| tₛ(vₛ) = Absurd if bb_min{pᵢ → bbᵢ \vert{} pᵢ(vₛ) ≠ ⊥} = /refutation clause/ +| tₛ(vₛ) = Match bb_min{pᵢ → bbᵢ \vert{} pᵢ(vₛ) ≠ ⊥} +/guard/ and /bb/ expressions are treated as blackboxes of OCaml code. +A sound approach for treating these blackboxes would be to inspect the +OCaml compiler during translation to Lambda code and extract the +blackboxes compiled in their Lambda representation. +This would allow to test for equality with the respective blackbox at +the target level. +Given that this level of introspection is currently not possibile, we +decided to restrict the structure of blackboxes to the following (valid) OCaml +code: + +#+BEGIN_SRC +external guard : 'a -> 'b = "guard" +external observe : 'a -> 'b = "observe" +#+END_SRC + +We assume these two external functions /guard/ and /observe/ with a valid +type that lets the user pass any number of arguments to them. +All the guards are of the form \texttt{guard }, where the + are expressed using the OCaml pattern matching language. +Similarly, all the right-hand-side expressions are of the form +\texttt{observe ...} with the same constraints on arguments. +#+BEGIN_SRC +(* declaration of an algebraic and recursive datatype t *) +type t = K1 | K2 of t + +let _ = function + | K1 -> observe 0 + | K2 K1 -> observe 1 + | K2 x when guard x -> observe 2 (* guard inspects the x variable *) + | K2 (K2 x) as y when guard x y -> observe 3 + | K2 _ -> observe 4 +#+END_SRC +We note that the right hand side of /observe/ is just an arbitrary +value and in this case just enumerates the order in which expressions +appear. +Even if this is an oversimplification of the problem for the +prototype, it must be noted that at the compiler level we have the +possibility to compile the pattern clauses in two separate steps so +that the guards and right-hand-side expressions are semantically equal +to their counterparts at the target program level. +\begin{lstlisting} +let _ = function + | K1 -> lambda₀ + | K2 K1 -> lambda₁ + | K2 x when lambda-guard₀ -> lambda₂ + | K2 (K2 x) as y when lambda-guard₁ -> lambda₃ + | K2 _ -> lambda₄ +\end{lstlisting} + \subsubsection{Matrix decomposition of pattern clauses} We define a new object, the /clause matrix/ P → L of size |m x n+1| that associates pattern vectors $\vec{p_i}$ to lambda code action lᵢ. @@ -1236,7 +1249,8 @@ q_{1,1} & p_{1,2} & \cdots & p_{1,n} → l₁ \\ \end{pmatrix}\big\rvert = n \end{equation*} In our prototype we make use of accessors to encode stored values. -\begin{minipage}{0.2\linewidth} + +\begin{minipage}{0.6\linewidth} \begin{verbatim} let value = 1 :: 2 :: 3 :: [] @@ -1333,19 +1347,19 @@ Let's first define some auxiliary functions - definition of group decomposition: | let constrs((pᵢ)^{i ∈ I}) = { K \vert{} K = constr(pᵢ), i ∈ I } | let Groups(m) where m = ((aᵢ)ⁱ ((pᵢⱼ)ⁱ → eⱼ)ⁱʲ) = -| \quad let (Kₖ)ᵏ = constrs(pᵢ₀)ⁱ in -| \quad ( Kₖ → -| \quad \quad ((a₀.ₗ)ˡ +++ tail(aᵢ)ⁱ) -| \quad \quad ( -| \quad \quad if pₒⱼ is Kₖ(qₗ) then -| \quad \quad \quad (qₗ)ˡ +++ tail(pᵢⱼ)ⁱ → eⱼ -| \quad \quad if pₒⱼ is _ then -| \quad \quad \quad (_)ˡ +++ tail(pᵢⱼ)ⁱ → eⱼ -| \quad \quad else ⊥ -| \quad \quad )ʲ -| \quad ), ( -| \quad \quad tail(aᵢ)ⁱ, (tail(pᵢⱼ)ⁱ → eⱼ if p₀ⱼ is _ else ⊥)ʲ -| \quad ) +| \quad \quad let (Kₖ)ᵏ = constrs(pᵢ₀)ⁱ in +| \quad \quad ( Kₖ → +| \quad \quad \quad \quad ((a₀.ₗ)ˡ +++ tail(aᵢ)ⁱ) +| \quad \quad \quad \quad ( +| \quad \quad \quad \quad if pₒⱼ is Kₖ(qₗ) then +| \quad \quad \quad \quad \quad \quad (qₗ)ˡ +++ tail(pᵢⱼ)ⁱ → eⱼ +| \quad \quad \quad \quad if pₒⱼ is _ then +| \quad \quad \quad \quad \quad \quad (_)ˡ +++ tail(pᵢⱼ)ⁱ → eⱼ +| \quad \quad \quad \quad else ⊥ +| \quad \quad \quad \quad )ʲ +| \quad \quad ), ( +| \quad \quad \quad \quad tail(aᵢ)ⁱ, (tail(pᵢⱼ)ⁱ → eⱼ if p₀ⱼ is _ else ⊥)ʲ +| \quad \quad ) Groups(m) is an auxiliary function that decomposes a matrix m into submatrices, according to the head constructor of their first pattern. Groups(m) returns one submatrix m_r for each head constructor K that @@ -1356,7 +1370,7 @@ Intuitively, m is equivalent to its decomposition in the following sense: if the first pattern of an input vector (v_i)^i starts with one of the head constructors Kₖ, then running (v_i)^i against m is the same as running it against the submatrix m_{Kₖ}; otherwise (its head -constructor is none of the Kₖ) it is equivalent to running it against +constructor ∉ (Kₖ)ᵏ) it is equivalent to running it against the wildcard submatrix. We formalize this intuition as follows @@ -1500,7 +1514,7 @@ respective decision tree produces the same result The equivalence checking algorithm takes as input a domain of possible values \emph{S} and a pair of source and target decision trees and in case the two trees are not equivalent it returns a counter example. -The algorithm respects the following correctness statement: +Our algorithm respects the following correctness statement: \begin{comment} TODO: we have to define what \coversTEX mean for readers to understand the specifications @@ -1618,11 +1632,10 @@ We say that Cₜ covers the input space /S/, written /covers(Cₜ, S)/ when every value vₛ∈S is a valid input to the decision tree Cₜ. (TODO: rephrase) Given an input space /S/ and a couple of decision trees, where -the target decision tree Cₜ covers the input space /S/, we say that -the two decision trees are equivalent when: +the target decision tree Cₜ covers the input space /S/ we can define equivalence: | equiv(S, Cₛ, Cₜ, gs) = Yes ∧ covers(Cₜ, S) → ∀vₛ≃vₜ ∈ S, Cₛ(vₛ) ≃gs Cₜ(vₜ) Similarly we say that a couple of decision trees in the presence of -an input space /S/ are /not/ equivalent when: +an input space /S/ are /not/ equivalent in the following way: | equiv(S, Cₛ, Cₜ, gs) = No(vₛ,vₜ) ∧ covers(Cₜ, S) → vₛ≃vₜ ∈ S ∧ Cₛ(vₛ) ≠gs Cₜ(vₜ) Corollary: For a full input space /S/, that is the universe of the target program: @@ -1727,7 +1740,7 @@ I think the unreachable case should go at the end. 3. When we have a Leaf or a Failure at the left side: | equiv(S, Failure as Cₛ, Switch(a, (πᵢ → Cₜᵢ)^{i∈I})) := Forall(equiv( S∩a→π(kᵢ)), Cₛ, Cₜᵢ)^{i∈I}) | equiv(S, Leaf bbₛ as Cₛ, Switch(a, (πᵢ → Cₜᵢ)^{i∈I})) := Forall(equiv( S∩a→π(kᵢ)), Cₛ, Cₜᵢ)^{i∈I}) - The algorithm either returns Yes for every sub-input space Sᵢ := S∩(a→π(kᵢ)) and + Our algorithm either returns Yes for every sub-input space Sᵢ := S∩(a→π(kᵢ)) and subtree Cₜᵢ | equiv(Sᵢ, Cₛ, Cₜᵢ) = Yes ∀i or we have a counter example vₛ, vₜ for which @@ -1740,7 +1753,7 @@ I think the unreachable case should go at the end. 4. When we have a Switch on the right we define π_{fallback} as the domain of values not covered but the union of the constructors kᵢ | π_{fallback} = ¬\bigcup\limits_{i∈I}π(kᵢ) - The algorithm proceeds by trimming + Our algorithm proceeds by trimming | equiv(S, Switch(a, (kᵢ → Cₛᵢ)^{i∈I}, C_{sf}), Cₜ) := | Forall(equiv( S∩(a→π(kᵢ)^{i∈I}), Cₛᵢ, C_{t/a→π(kᵢ)})^{i∈I} +++ equiv(S∩(a→πₙ), Cₛ, C_{a→π_{fallback}})) The statement still holds and we show this by first analyzing the