diff --git a/tesi/.gitignore b/tesi/.gitignore index 0df34f0..5ff4fc3 100644 --- a/tesi/.gitignore +++ b/tesi/.gitignore @@ -1,4 +1,5 @@ tesi.tex +.#tesi.tex ## Core latex/pdflatex auxiliary files: *.aux diff --git a/tesi/conv.py b/tesi/conv.py index 3206322..7a2ab89 100644 --- a/tesi/conv.py +++ b/tesi/conv.py @@ -3,7 +3,7 @@ import re from sys import argv allsymbols = json.load(open('./unicode-latex.json')) -mysymbols = ['≡', '≼', '→', '⊀', '⋠', '≺', '∀', '∈', '₂', '₁', 'ₐ', 'ₘ', 'ₙ', 'ᵢ', 'ⁱ'] +mysymbols = ['≡', '≠', '≼', '→', '←', '⊀', '⋠', '≺', '∀', '∈', '₀', '₂', '₁', '₃', 'ₐ', 'ₖ', 'ₘ', 'ₙ', 'ᵢ', 'ⁱ', '⋮'] symbols = {s: allsymbols[s] for s in mysymbols} mathsymbols = {s: '$'+allsymbols[s]+'$' for s in symbols} @@ -18,10 +18,11 @@ def read_by_char(fname): for line in fp.readlines(): cnt += 1 words = [w.strip() for w in line.split(' ')] + if mathmode_begin.intersection(words): assert mathmode == False mathmode = True - elif mathmode_end.intersection(words): + if mathmode_end.intersection(words): assert mathmode == True, f'Line: {words}, number: {cnt}' mathmode = False diff --git a/tesi/tesi.pdf b/tesi/tesi.pdf index f8637f1..9d07d91 100644 Binary files a/tesi/tesi.pdf and b/tesi/tesi.pdf differ diff --git a/tesi/tesi_unicode.org b/tesi/tesi_unicode.org index e2f29a2..df058c0 100644 --- a/tesi/tesi_unicode.org +++ b/tesi/tesi_unicode.org @@ -1,13 +1,14 @@ -* TODO Scaletta [1/2] +\begin{comment} +* TODO Scaletta [1/4] - [X] Abstract - - [-] Background [20%] + - [-] Background [40%] - [X] Ocaml - [ ] Lambda code [0%] - [ ] Untyped lambda form - [ ] OCaml specific instructions - - [-] Pattern matching [50%] + - [X] Pattern matching [100%] - [X] Introduzione - - [ ] Compilation to lambda form + - [X] Compilation to lambda form - [ ] Translation Verification - [ ] Symbolic execution - [ ] Translation verification of the Pattern Matching Compiler @@ -18,8 +19,9 @@ - [ ] Formal Grammar - [ ] Symbolic execution of the lambda target - [ ] Equivalence between source and target - - [ ] Practical results + - [ ] Practical results +\end{comment} #+TITLE: Translation Verification of the OCaml pattern matching compiler #+AUTHOR: Francesco Mecca @@ -28,8 +30,8 @@ #+LANGUAGE: en #+LaTeX_CLASS: article -#+LaTeX_HEADER: \usepackage[utf8]{inputenc} #+LaTeX_HEADER: \usepackage{algorithm} +#+LaTeX_HEADER: \usepackage{comment} #+LaTeX_HEADER: \usepackage{algpseudocode} #+LaTeX_HEADER: \usepackage{amsmath,amssymb,amsthm} #+Latex_HEADER: \newtheorem{definition}{Definition} @@ -102,96 +104,120 @@ inheritance, subtyping and dynamic binding, and modules, that provide a way to encapsulate definitions. Modules are checked statically and can be reificated through functors. -*** TODO Pattern matching [37%] -- [ ] capisci come mettere gli esempi uno accanto all'altro +*** Pattern matching Pattern matching is a widely adopted mechanism to interact with ADT. C family languages provide branching on predicates through the use of if statements and switch statements. -Pattern matching is a mechanism for destructuring and analyzing data -structures for the presence of values simbolically represented as -tokens. One common example of pattern matching is the use of regular -expressions on strings. OCaml provides pattern matching on ADT, +Pattern matching on the other hands express predicates through +syntactic templates that also allow to bind on data structures of +arbitrary shapes. One common example of pattern matching is the use of regular +expressions on strings. OCaml provides pattern matching on ADT and primitive data types. +The result of a pattern matching operation is always one of: +- this value does not match this pattern” +- this value matches this pattern, resulting the following bindings of + names to values and the jump to the expression pointed at the + pattern. -- [X] Esempio enum, C e Ocaml #+BEGIN_SRC ocaml -type color = | Red | Blue | Green +type color = | Red | Blue | Green | Black | White -begin match color with +match color with | Red -> print "red" | Blue -> print "red" | Green -> print "red" - +| _ -> print "white or black" #+END_SRC -OCaml provides tokens to express data destructoring +OCaml provides tokens to express data destructoring. +For example we can examine the content of a list with patten matching - -- [X] Esempio destructor list #+BEGIN_SRC ocaml begin match list with | [ ] -> print "empty list" | element1 :: [ ] -> print "one element" -| element1 :: element2 :: [ ] -> print "two elements" +| (element1 :: element2) :: [ ] -> print "two elements" | head :: tail-> print "head followed by many elements" #+END_SRC -- [X] Esempio destructor tuples +Parenthesized patterns, such as the third one in the previous example, +matches the same value as the pattern without parenthesis. + +The same could be done with tuples #+BEGIN_SRC ocaml begin match tuple with | (Some _, Some _) -> print "Pair of optional types" -| (Some _, None) -> print "Pair of optional types, last null" -| (None, Some _) -> print "Pair of optional types, first null" +| (Some _, None) | (None, Some _) -> print "Pair of optional types, one of which is null" | (None, None) -> print "Pair of optional types, both null" #+END_SRC -Pattern clauses can make the use of /guards/ to test predicates and -variables can be binded in scope. +The pattern pattern₁ | pattern₂ represents the logical "or" of the +two patterns pattern₁ and pattern₂. A value matches pattern₁ | +pattern₂ if it matches pattern₁ or pattern₂. + +Pattern clauses can make the use of /guards/ to test predicates and +variables can captured (binded in scope). -- [ ] Esempio binding e guards #+BEGIN_SRC ocaml begin match token_list with -| "switch"::var::"{"::rest -> -| "case"::":"::var::rest when is_int var -> -| "case"::":"::var::rest when is_string var -> -| "}"::[ ] -> stop () +| "switch"::var::"{"::rest -> ... +| "case"::":"::var::rest when is_int var -> ... +| "case"::":"::var::rest when is_string var -> ... +| "}"::[ ] -> ... | "}"::rest -> error "syntax error: " rest #+END_SRC -- [ ] Un altro esempio con destructors e tutto i lresto +Moreover, the OCaml pattern matching compiler emits a warning when a +pattern is not exhaustive or some patterns are shadowed by precedent ones. In general pattern matching on primitive and algebraic data types takes the following form. -- [ ] Esempio informale +|match variable with +|\vert pattern₁ -> expr₁ +|\vert pattern₂ when guard -> expr₂ +|\vert pattern₃ as var -> expr₃ +|⋮ +|\vert pₙ -> exprₙ It can be described more formally through a BNF grammar. -- [ ] BNF +#+BEGIN_SRC bnf -- [ ] Come funziona il pattern matching? +pattern ::= value-name +| _ ;; wildcard pattern +| constant ;; matches a constant value +| pattern as value-name ;; binds to value-name +| ( pattern ) ;; parenthesized pattern +| pattern | pattern ;; or-pattern +| constr pattern ;; variant pattern +| [ pattern { ; pattern } [ ; ] ] ;; list patterns +| pattern :: pattern ;; lists patterns using cons operator (::) +| [| pattern { ; pattern } [ ; ] |] ;; array pattern +| char-literal .. char-literal ;; match on a range of characters +| { field [: typexpr] [= pattern] { ; field [: typexpr] [= pattern] } \ + [; _ ] [ ; ] } ;; patterns that match on records -*** TODO 1.2.1 Pattern matching compilation to lambda code +#+END_SRC -- [ ] Da tabella a matrice -Formally, pattern are defined as follows: -| pattern | Patterns | +*** 1.2.1 Pattern matching compilation to lambda code + +During compilation, patterns are in the form +| pattern | type of pattern | |-----------------+---------------------| | _ | wildcard | | x | variable | | c(p₁,p₂,...,pₙ) | constructor pattern | | (p₁\vert p₂) | or-pattern | -Values are defined as follows: -| values | Values | -|---------------------+-------------------| -| c(v₁, v₂, ..., vₙ) | constructor value | +Expressions are compiled into lambda code and are referred as lambda +code actions. The entire pattern matching code can be represented as a clause matrix that associates rows of patterns (p_{i,1}, p_{i,2}, ..., p_{i,n}) to @@ -220,7 +246,6 @@ called a constant constructor. The pattern /p/ matches a value /v/, written as p ≼ v, when one of the following rules apply -| | | | | |--------------------+---+--------------------+-------------------------------------------| | _ | ≼ | v | ∀v | | x | ≼ | v | ∀v | @@ -229,9 +254,9 @@ following rules apply | (p₁, p₂, ..., pₐ) | ≼ | (v₁, v₂, ..., vₐ) | iff pᵢ ≼ vᵢ ∀i ∈ [1..a] | |--------------------+---+--------------------+-------------------------------------------| -We can also say that /v/ is an /instance/ of /p/. +When a value /v/ matches pattern /p/ we say that /v/ is an /instance/ of /p/. -When we consider the pattern matrix P we say that the value vector +Considering the pattern matrix P we say that the value vector $\vec{v}$ = (v₁, v₂, ..., vᵢ) matches the line number i in P if and only if the following two conditions are satisfied: - p_{i,1}, p_{i,2}, \cdots, p_{i,n} ≼ (v₁, v₂, ..., vᵢ) @@ -244,28 +269,129 @@ We can define the following three relations with respect to patterns: are the same - Patterns p and q are compatible when they share a common instance -** 1.2.1.1 Initial state of the compilation +**** Initial state of the compilation Given a source of the following form: -#+BEGIN_SRC ocaml -match x with -| p₁ -> e₁ -| p₂ -> e₂ -... -| pₘ -> eₘ -#+END_SRC ocaml -the initial input of the algorithm consists of a vector of variables -$\vec{x}$ = (x₁, x₂, ..., xₙ) of size n -and a clause matrix P → L of width n and height m. +|match variable with +|\vert pattern₁ -> e₁ +|\vert pattern₂ -> e₂ +|⋮ +|\vert pₘ -> eₘ + +the initial input of the algorithm C consists of a vector of variables +$\vec{x}$ = (x₁, x₂, ..., xₙ) of size /n/ where /n/ is the arity of +the type of /x/ and a clause matrix P → L of width n and height m. +That is: \begin{equation*} -(P → L) = +C((\vec{x} = (x₁, x₂, ..., xₙ), \begin{pmatrix} p_{1,1} & p_{1,2} & \cdots & p_{1,n} → l₁ \\ p_{2,1} & p_{2,2} & \cdots & p_{2,n} → l₂ \\ -\vdots & \vdots & \ddots \vdots → \vdots \\ -p_{m,1} & p_{m,2} & \cdots & p_{m,n} → lₘ +\vdots & \vdots & \ddots & \vdots → \vdots \\ +p_{m,1} & p_{m,2} & \cdots & p_{m,n} → lₘ) \end{pmatrix} \end{equation*} + +The base case C₀ of the algorithm is the case in which the $\vec{x}$ +is empty, that is $\vec{x}$ = (), then the result of the compilation +C₀ is l₁ +\begin{equation*} +C₀((), +\begin{pmatrix} +→ l₁ \\ +→ l₂ \\ +→ \vdots \\ +→ lₘ +\end{pmatrix}) +) = l₁ +\end{equation*} + +When $\vec{x}$ ≠ () then the compilation advances using one of the +following four rules: + +1) Variable rule: if all patterns of the first column of P are wildcard patterns or + bind the value to a variable, then + + \begin{equation*} + C(\vec{x}, P → L) = C((x₂, x₃, ..., xₙ), P' → L') + \end{equation*} + where + \begin{equation*} + \begin{pmatrix} + p_{1,2} & \cdots & p_{1,n} & → & (let & y₁ & x₁) & l₁ \\ + p_{2,2} & \cdots & p_{2,n} & → & (let & y₂ & x₁) & l₂ \\ + \vdots & \ddots & \vdots & → & \vdots & \vdots & \vdots & \vdots \\ + p_{m,2} & \cdots & p_{m,n} & → & (let & yₘ & x₁) & lₘ + \end{pmatrix} + \end{equation*} + + That means in every lambda action lᵢ there is a binding of x₁ to the + variable that appears on the pattern $p_{i,1}. Bindings are omitted + for wildcard patterns and the lambda action lᵢ remains unchanged. + +2) Constructor rule: if all patterns in the first column of P are + constructors patterns of the form k(q₁, q₂, ..., qₙ) we define a + new matrix, the specialized clause matrix S, by applying the + following transformation on every row /p/: + \begin{lstlisting}[mathescape,columns=fullflexible,basicstyle=\fontfamily{lmvtt}\selectfont,] + for every c ∈ Set of constructors + for i ← 1 .. m + let kᵢ ← constructor_of($p_{i,1}$) + if kᵢ = c then + p ← $q_{i,1}$, $q_{i,2}$, ..., $q_{i,n'}$, $p_{i,2}$, $p_{i,3}$, ..., $p_{i, n}$ + \end{lstlisting} + Patterns of the form $q_{i,j}$ matches on the values of the + constructor and we define new fresh variables y₁, y₂, ..., yₐ so + that the lambda action lᵢ becomes + +\begin{lstlisting}[mathescape,columns=fullflexible,basicstyle=\fontfamily{lmvtt}\selectfont,] + (let (y₁ (field 0 x₁)) + (y₂ (field 1 x₁)) + ... + (yₐ (field (a-1) x₁)) + lᵢ) +\end{lstlisting} + + and the result of the compilation for the set of constructors + {c₁, c₂, ..., cₖ} is: + +\begin{lstlisting}[mathescape,columns=fullflexible,basicstyle=\fontfamily{lmvtt}\selectfont,] + switch x₁ with + case c₁: l₁ + case c₂: l₂ + ... + case cₖ: lₖ + default: exit +\end{lstlisting} + +3) Orpat rule: there are various strategies for dealing with + or-patterns. The most naive one is to split the or-patterns. + For example a row p containing an or-pattern: + \begin{equation*} + (p_{i,1}|q_{i,1}|r_{i,1}), p_{i,2}, ..., p_{i,m} → lᵢ + \end{equation*} + results in three rows added to the clause matrix + \begin{equation*} + p_{i,1}, p_{i,2}, ..., p_{i,m} → lᵢ \\ + \end{equation*} + \begin{equation*} + q_{i,1}, p_{i,2}, ..., p_{i,m} → lᵢ \\ + \end{equation*} + \begin{equation*} + r_{i,1}, p_{i,2}, ..., p_{i,m} → lᵢ + \end{equation*} +4) Mixture rule: + When none of the previous rules apply the clause matrix P → L is + splitted into two clause matrices, the first P₁ → L₁ that is the + largest prefix matrix for which one of the three previous rules + apply, and P₂ → L₂ containing the remaining rows. The algorithm is + applied to both matrices. + +\begin{comment} +#+BEGIN_COMMENT +CITE paper? +#+END_COMMENT’ +\end{comment} diff --git a/tesi/tesi_unicode.tex b/tesi/tesi_unicode.tex index ad81c61..050fe9e 100644 --- a/tesi/tesi_unicode.tex +++ b/tesi/tesi_unicode.tex @@ -1,4 +1,4 @@ -% Created 2020-02-24 Mon 14:37 +% Created 2020-02-24 Mon 19:44 % Intended LaTeX compiler: pdflatex \documentclass[11pt]{article} \usepackage[utf8]{inputenc} @@ -14,8 +14,8 @@ \usepackage{amssymb} \usepackage{capt-of} \usepackage{hyperref} -\usepackage[utf8]{inputenc} \usepackage{algorithm} +\usepackage{comment} \usepackage{algpseudocode} \usepackage{amsmath,amssymb,amsthm} \newtheorem{definition}{Definition} @@ -35,11 +35,12 @@ \begin{document} \maketitle -\section{{\bfseries\sffamily TODO} Scaletta [1/2]} -\label{sec:org8ebbc9e} +\begin{comment} +\section{{\bfseries\sffamily TODO} Scaletta [1/4]} +\label{sec:orgdce55fc} \begin{itemize} \item[{$\boxtimes$}] Abstract -\item[{$\boxminus$}] Background [20\%] +\item[{$\boxminus$}] Background [40\%] \begin{itemize} \item[{$\boxtimes$}] Ocaml \item[{$\square$}] Lambda code [0\%] @@ -47,10 +48,10 @@ \item[{$\square$}] Untyped lambda form \item[{$\square$}] OCaml specific instructions \end{itemize} -\item[{$\boxminus$}] Pattern matching [50\%] +\item[{$\boxtimes$}] Pattern matching [100\%] \begin{itemize} \item[{$\boxtimes$}] Introduzione -\item[{$\square$}] Compilation to lambda form +\item[{$\boxtimes$}] Compilation to lambda form \end{itemize} \item[{$\square$}] Translation Verification \item[{$\square$}] Symbolic execution @@ -69,11 +70,10 @@ \end{itemize} \item[{$\square$}] Equivalence between source and target \end{itemize} -\begin{itemize} \item[{$\square$}] Practical results \end{itemize} -\end{itemize} +\end{comment} \begin{abstract} @@ -92,10 +92,10 @@ blackboxes that are not evaluated in the context of the verification. \end{abstract} \section{Background} -\label{sec:org73f49d4} +\label{sec:orgf336654} \subsection{OCaml} -\label{sec:orga0f2160} +\label{sec:org885a2ca} Objective Caml (OCaml) is a dialect of the ML (Meta-Language) family of programming languages. OCaml shares many features with other dialects of ML, such as SML and Caml Light, @@ -142,108 +142,122 @@ provide a way to encapsulate definitions. Modules are checked statically and can be reificated through functors. \begin{enumerate} -\item {\bfseries\sffamily TODO} Pattern matching [37\%] -\label{sec:org8bcefd9} -\begin{itemize} -\item[{$\square$}] capisci come mettere gli esempi uno accanto all'altro -\end{itemize} +\item Pattern matching +\label{sec:org879bb72} Pattern matching is a widely adopted mechanism to interact with ADT. C family languages provide branching on predicates through the use of if statements and switch statements. -Pattern matching is a mechanism for destructuring and analyzing data -structures for the presence of values simbolically represented as -tokens. One common example of pattern matching is the use of regular -expressions on strings. OCaml provides pattern matching on ADT, +Pattern matching on the other hands express predicates through +syntactic templates that also allow to bind on data structures of +arbitrary shapes. One common example of pattern matching is the use of regular +expressions on strings. OCaml provides pattern matching on ADT and primitive data types. - +The result of a pattern matching operation is always one of: \begin{itemize} -\item[{$\boxtimes$}] Esempio enum, C e Ocaml +\item this value does not match this pattern” +\item this value matches this pattern, resulting the following bindings of +names to values and the jump to the expression pointed at the +pattern. \end{itemize} -\begin{verbatim} -type color = | Red | Blue | Green -begin match color with +\begin{verbatim} +type color = | Red | Blue | Green | Black | White + +match color with | Red -> print "red" | Blue -> print "red" | Green -> print "red" - +| _ -> print "white or black" \end{verbatim} -OCaml provides tokens to express data destructoring +OCaml provides tokens to express data destructoring. +For example we can examine the content of a list with patten matching - -\begin{itemize} -\item[{$\boxtimes$}] Esempio destructor list -\end{itemize} \begin{verbatim} begin match list with | [ ] -> print "empty list" | element1 :: [ ] -> print "one element" -| element1 :: element2 :: [ ] -> print "two elements" +| (element1 :: element2) :: [ ] -> print "two elements" | head :: tail-> print "head followed by many elements" \end{verbatim} -\begin{itemize} -\item[{$\boxtimes$}] Esempio destructor tuples -\end{itemize} +Parenthesized patterns, such as the third one in the previous example, +matches the same value as the pattern without parenthesis. + +The same could be done with tuples \begin{verbatim} begin match tuple with | (Some _, Some _) -> print "Pair of optional types" -| (Some _, None) -> print "Pair of optional types, last null" -| (None, Some _) -> print "Pair of optional types, first null" +| (Some _, None) | (None, Some _) -> print "Pair of optional types, one of which is null" | (None, None) -> print "Pair of optional types, both null" \end{verbatim} -Pattern clauses can make the use of \emph{guards} to test predicates and -variables can be binded in scope. +The pattern pattern₁ | pattern₂ represents the logical "or" of the +two patterns pattern₁ and pattern₂. A value matches pattern₁ | +pattern₂ if it matches pattern₁ or pattern₂. + +Pattern clauses can make the use of \emph{guards} to test predicates and +variables can captured (binded in scope). -\begin{itemize} -\item[{$\square$}] Esempio binding e guards -\end{itemize} \begin{verbatim} begin match token_list with -| "switch"::var::"{"::rest -> -| "case"::":"::var::rest when is_int var -> -| "case"::":"::var::rest when is_string var -> -| "}"::[ ] -> stop () +| "switch"::var::"{"::rest -> ... +| "case"::":"::var::rest when is_int var -> ... +| "case"::":"::var::rest when is_string var -> ... +| "}"::[ ] -> ... | "}"::rest -> error "syntax error: " rest \end{verbatim} -\begin{itemize} -\item[{$\square$}] Un altro esempio con destructors e tutto i lresto -\end{itemize} +Moreover, the OCaml pattern matching compiler emits a warning when a +pattern is not exhaustive or some patterns are shadowed by precedent ones. In general pattern matching on primitive and algebraic data types takes the following form. -\begin{itemize} -\item[{$\square$}] Esempio informale -\end{itemize} +\begin{center} +\begin{tabular}{l} +match variable with\\ +\(\vert{}\) pattern₁ -> expr₁\\ +\(\vert{}\) pattern₂ when guard -> expr₂\\ +\(\vert{}\) pattern₃ as var -> expr₃\\ +⋮\\ +\(\vert{}\) pₙ -> exprₙ\\ +\end{tabular} +\end{center} It can be described more formally through a BNF grammar. -\begin{itemize} -\item[{$\square$}] BNF +\begin{verbatim} -\item[{$\square$}] Come funziona il pattern matching? -\end{itemize} +pattern ::= value-name +| _ ;; wildcard pattern +| constant ;; matches a constant value +| pattern as value-name ;; binds to value-name +| ( pattern ) ;; parenthesized pattern +| pattern | pattern ;; or-pattern +| constr pattern ;; variant pattern +| [ pattern { ; pattern } [ ; ] ] ;; list patterns +| pattern :: pattern ;; lists patterns using cons operator (::) +| [| pattern { ; pattern } [ ; ] |] ;; array pattern +| char-literal .. char-literal ;; match on a range of characters +| { field [: typexpr] [= pattern] { ; field [: typexpr] [= pattern] } \ + [; _ ] [ ; ] } ;; patterns that match on records -\item {\bfseries\sffamily TODO} 1.2.1 Pattern matching compilation to lambda code -\label{sec:orgd51f0ff} +\end{verbatim} -\begin{itemize} -\item[{$\square$}] Da tabella a matrice -\end{itemize} -Formally, pattern are defined as follows: +\item 1.2.1 Pattern matching compilation to lambda code +\label{sec:org5e1ded7} + +During compilation, patterns are in the form \begin{center} \begin{tabular}{ll} -pattern & Patterns\\ +pattern & type of pattern\\ \hline \_ & wildcard\\ x & variable\\ @@ -252,14 +266,8 @@ c(p₁,p₂,\ldots{},pₙ) & constructor pattern\\ \end{tabular} \end{center} -Values are defined as follows: -\begin{center} -\begin{tabular}{ll} -values & Values\\ -\hline -c(v₁, v₂, \ldots{}, vₙ) & constructor value\\ -\end{tabular} -\end{center} +Expressions are compiled into lambda code and are referred as lambda +code actions. The entire pattern matching code can be represented as a clause matrix that associates rows of patterns (p\(_{\text{i,1}}\), p\(_{\text{i,2}}\), \ldots{}, p\(_{\text{i,n}}\)) to @@ -290,7 +298,6 @@ following rules apply \begin{center} \begin{tabular}{llll} - & & & \\ \hline \_ & ≼ & v & ∀v\\ x & ≼ & v & ∀v\\ @@ -301,9 +308,9 @@ c(p₁, p₂, \ldots{}, pₐ) & ≼ & c(v₁, v₂, \ldots{}, vₐ) & iff (p₁, \end{tabular} \end{center} -We can also say that \emph{v} is an \emph{instance} of \emph{p}. +When a value \emph{v} matches pattern \emph{p} we say that \emph{v} is an \emph{instance} of \emph{p}. -When we consider the pattern matrix P we say that the value vector +Considering the pattern matrix P we say that the value vector \(\vec{v}\) = (v₁, v₂, \ldots{}, vᵢ) matches the line number i in P if and only if the following two conditions are satisfied: \begin{itemize} @@ -319,40 +326,143 @@ instances of q are instances of p are the same \item Patterns p and q are compatible when they share a common instance \end{itemize} -\end{enumerate} -\subsection{1.2.1.1 Initial state of the compilation} -\label{sec:org333d6ea} +\begin{enumerate} +\item Initial state of the compilation +\label{sec:org886c2ed} Given a source of the following form: -\#+BEGIN\_SRC ocaml -match x with -\begin{center} -\begin{tabular}{l} -p₁ -> e₁\\ -p₂ -> e₂\\ -\end{tabular} -\end{center} -\ldots{} -\begin{center} -\begin{tabular}{l} -pₘ -> eₘ\\ -\end{tabular} -\end{center} -\#+END\_SRC ocaml -the initial input of the algorithm consists of a vector of variables -\(\vec{x}\) = (x₁, x₂, \ldots{}, xₙ) of size n -and a clause matrix P → L of width n and height m. +\begin{center} +\begin{tabular}{l} +match variable with\\ +\(\vert{}\) pattern₁ -> e₁\\ +\(\vert{}\) pattern₂ -> e₂\\ +⋮\\ +\(\vert{}\) pₘ -> eₘ\\ +\end{tabular} +\end{center} + +the initial input of the algorithm C consists of a vector of variables +\(\vec{x}\) = (x₁, x₂, \ldots{}, xₙ) of size \emph{n} where \emph{n} is the arity of +the type of \emph{x} and a clause matrix P → L of width n and height m. +That is: \begin{equation*} -(P → L) = +C((\vec{x} = (x₁, x₂, ..., xₙ), \begin{pmatrix} p_{1,1} & p_{1,2} & \cdots & p_{1,n} → l₁ \\ p_{2,1} & p_{2,2} & \cdots & p_{2,n} → l₂ \\ -\vdots & \vdots & \ddots \vdots → \vdots \\ -p_{m,1} & p_{m,2} & \cdots & p_{m,n} → lₘ +\vdots & \vdots & \ddots & \vdots → \vdots \\ +p_{m,1} & p_{m,2} & \cdots & p_{m,n} → lₘ) \end{pmatrix} \end{equation*} + +The base case C₀ of the algorithm is the case in which the \(\vec{x}\) +is empty, that is \(\vec{x}\) = (), then the result of the compilation +C₀ is l₁ +\begin{equation*} +C₀((), +\begin{pmatrix} +→ l₁ \\ +→ l₂ \\ +→ \vdots \\ +→ lₘ +\end{pmatrix}) +) = l₁ +\end{equation*} + +When \(\vec{x}\) ≠ () then the compilation advances using one of the +following four rules: + +\begin{enumerate} +\item Variable rule: if all patterns of the first column of P are wildcard patterns or +bind the value to a variable, then + +\begin{equation*} +C(\vec{x}, P → L) = C((x₂, x₃, ..., xₙ), P' → L') +\end{equation*} +where +\begin{equation*} +\begin{pmatrix} +p_{1,2} & \cdots & p_{1,n} & → & (let & y₁ & x₁) & l₁ \\ +p_{2,2} & \cdots & p_{2,n} & → & (let & y₂ & x₁) & l₂ \\ +\vdots & \ddots & \vdots & → & \vdots & \vdots & \vdots & \vdots \\ +p_{m,2} & \cdots & p_{m,n} & → & (let & yₘ & x₁) & lₘ +\end{pmatrix} +\end{equation*} + +That means in every lambda action lᵢ there is a binding of x₁ to the +variable that appears on the pattern \$p\(_{\text{i,1}}\). Bindings are omitted +for wildcard patterns and the lambda action lᵢ remains unchanged. + +\item Constructor rule: if all patterns in the first column of P are +constructors patterns of the form k(q₁, q₂, \ldots{}, qₙ) we define a +new matrix, the specialized clause matrix S, by applying the +following transformation on every row \emph{p}: +\begin{lstlisting}[mathescape,columns=fullflexible,basicstyle=\fontfamily{lmvtt}\selectfont,] +for every c ∈ Set of constructors + for i ← 1 .. m + let kᵢ ← constructor_of($p_{i,1}$) + if kᵢ = c then + p ← $q_{i,1}$, $q_{i,2}$, ..., $q_{i,n'}$, $p_{i,2}$, $p_{i,3}$, ..., $p_{i, n}$ +\end{lstlisting} +Patterns of the form \(q_{i,j}\) matches on the values of the +constructor and we define new fresh variables y₁, y₂, \ldots{}, yₐ so +that the lambda action lᵢ becomes +\end{enumerate} + +\begin{lstlisting}[mathescape,columns=fullflexible,basicstyle=\fontfamily{lmvtt}\selectfont,] + (let (y₁ (field 0 x₁)) + (y₂ (field 1 x₁)) + ... + (yₐ (field (a-1) x₁)) + lᵢ) +\end{lstlisting} + +and the result of the compilation for the set of constructors +\{c₁, c₂, \ldots{}, cₖ\} is: + +\begin{lstlisting}[mathescape,columns=fullflexible,basicstyle=\fontfamily{lmvtt}\selectfont,] + switch x₁ with + case c₁: l₁ + case c₂: l₂ + ... + case cₖ: lₖ + default: exit +\end{lstlisting} + +\begin{enumerate} +\item Orpat rule: there are various strategies for dealing with +or-patterns. The most naive one is to split the or-patterns. +For example a row p containing an or-pattern: +\begin{equation*} +(p_{i,1}|q_{i,1}|r_{i,1}), p_{i,2}, ..., p_{i,m} → lᵢ +\end{equation*} +results in three rows added to the clause matrix +\begin{equation*} +p_{i,1}, p_{i,2}, ..., p_{i,m} → lᵢ \\ +\end{equation*} +\begin{equation*} +q_{i,1}, p_{i,2}, ..., p_{i,m} → lᵢ \\ +\end{equation*} +\begin{equation*} +r_{i,1}, p_{i,2}, ..., p_{i,m} → lᵢ +\end{equation*} +\item Mixture rule: +When none of the previous rules apply the clause matrix P → L is +splitted into two clause matrices, the first P₁ → L₁ that is the +largest prefix matrix for which one of the three previous rules +apply, and P₂ → L₂ containing the remaining rows. The algorithm is +applied to both matrices. +\end{enumerate} + +\begin{comment} +#+BEGIN_COMMENT +CITE paper? +#+END_COMMENT’ +\end{comment} +\end{enumerate} +\end{enumerate} \end{document}