tesi coppo

2020-06-30 23:21:29 +02:00 · 2020-06-30 23:21:29 +02:00 · 4a35831f00
commit 4a35831f00
parent 0ba93f9046
14 changed files with 167 additions and 116 deletions
--- a/anno3/apprendimento_automatico/esercizi/marco/classification_iris_knn_aa_19_20-checkpoint.ipynb
+++ b/anno3/apprendimento_automatico/esercizi/marco/classification_iris_knn_aa_19_20-checkpoint.ipynb
--- a/anno3/apprendimento_automatico/esercizi/marco/coverage_plots.ipynb
+++ b/anno3/apprendimento_automatico/esercizi/marco/coverage_plots.ipynb
@ -483,7 +483,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.5"
+   "version": "3.7.7"
  }
 },
 "nbformat": 4,
--- a/anno3/apprendimento_automatico/esercizi/marco/my_iris_predictions
+++ b/anno3/apprendimento_automatico/esercizi/marco/my_iris_predictions
@ -1,24 +1,24 @@
-digraph Tree {
-node [shape=box] ;
-0 [label="X[2] <= 2.45\nentropy = 1.585\nsamples = 150\nvalue = [50, 50, 50]"] ;
-1 [label="entropy = 0.0\nsamples = 50\nvalue = [50, 0, 0]"] ;
-0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
-2 [label="X[3] <= 1.75\nentropy = 1.0\nsamples = 100\nvalue = [0, 50, 50]"] ;
-0 -> 2 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
-3 [label="X[2] <= 4.95\nentropy = 0.445\nsamples = 54\nvalue = [0, 49, 5]"] ;
-2 -> 3 ;
-4 [label="X[0] <= 5.15\nentropy = 0.146\nsamples = 48\nvalue = [0, 47, 1]"] ;
-3 -> 4 ;
-5 [label="entropy = 0.722\nsamples = 5\nvalue = [0, 4, 1]"] ;
-4 -> 5 ;
-6 [label="entropy = 0.0\nsamples = 43\nvalue = [0, 43, 0]"] ;
-4 -> 6 ;
-7 [label="entropy = 0.918\nsamples = 6\nvalue = [0, 2, 4]"] ;
-3 -> 7 ;
-8 [label="X[2] <= 4.95\nentropy = 0.151\nsamples = 46\nvalue = [0, 1, 45]"] ;
-2 -> 8 ;
-9 [label="entropy = 0.65\nsamples = 6\nvalue = [0, 1, 5]"] ;
-8 -> 9 ;
-10 [label="entropy = 0.0\nsamples = 40\nvalue = [0, 0, 40]"] ;
-8 -> 10 ;
-}
+digraph Tree {
+node [shape=box] ;
+0 [label="X[2] <= 2.45\nentropy = 1.585\nsamples = 150\nvalue = [50, 50, 50]"] ;
+1 [label="entropy = 0.0\nsamples = 50\nvalue = [50, 0, 0]"] ;
+0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
+2 [label="X[3] <= 1.75\nentropy = 1.0\nsamples = 100\nvalue = [0, 50, 50]"] ;
+0 -> 2 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
+3 [label="X[2] <= 4.95\nentropy = 0.445\nsamples = 54\nvalue = [0, 49, 5]"] ;
+2 -> 3 ;
+4 [label="X[0] <= 5.15\nentropy = 0.146\nsamples = 48\nvalue = [0, 47, 1]"] ;
+3 -> 4 ;
+5 [label="entropy = 0.722\nsamples = 5\nvalue = [0, 4, 1]"] ;
+4 -> 5 ;
+6 [label="entropy = 0.0\nsamples = 43\nvalue = [0, 43, 0]"] ;
+4 -> 6 ;
+7 [label="entropy = 0.918\nsamples = 6\nvalue = [0, 2, 4]"] ;
+3 -> 7 ;
+8 [label="X[2] <= 4.95\nentropy = 0.151\nsamples = 46\nvalue = [0, 1, 45]"] ;
+2 -> 8 ;
+9 [label="entropy = 0.65\nsamples = 6\nvalue = [0, 1, 5]"] ;
+8 -> 9 ;
+10 [label="entropy = 0.0\nsamples = 40\nvalue = [0, 0, 40]"] ;
+8 -> 10 ;
+}
--- a/tesi/files/table.png
+++ b/tesi/files/table.png
--- a/tesi/ltximg/org-ltximg_03dd30c0c071d98d7cc1c5d906f4fcf8b1428937.png
+++ b/tesi/ltximg/org-ltximg_03dd30c0c071d98d7cc1c5d906f4fcf8b1428937.png
--- a/tesi/ltximg/org-ltximg_270bf2a704603067e4ae1970d90fdd67c67a7c54.png
+++ b/tesi/ltximg/org-ltximg_270bf2a704603067e4ae1970d90fdd67c67a7c54.png
--- a/tesi/ltximg/org-ltximg_32d9bce1a90372b18cdecc56bbda8c883076e2b3.png
+++ b/tesi/ltximg/org-ltximg_32d9bce1a90372b18cdecc56bbda8c883076e2b3.png
--- a/tesi/ltximg/org-ltximg_38fb63e0cd1a10344ae59e94735fb511d197bd6a.png
+++ b/tesi/ltximg/org-ltximg_38fb63e0cd1a10344ae59e94735fb511d197bd6a.png
--- a/tesi/ltximg/org-ltximg_b405f26b28de7c623493ee7f39493b2adc6f035f.png
+++ b/tesi/ltximg/org-ltximg_b405f26b28de7c623493ee7f39493b2adc6f035f.png
--- a/tesi/ltximg/org-ltximg_deebd764e81e5cc3bb1fc3d997280ffbea45fdf4.png
+++ b/tesi/ltximg/org-ltximg_deebd764e81e5cc3bb1fc3d997280ffbea45fdf4.png
--- a/tesi/ltximg/org-ltximg_e4271b786c7180a94a3f11cd0175cf7dbf929f67.png
+++ b/tesi/ltximg/org-ltximg_e4271b786c7180a94a3f11cd0175cf7dbf929f67.png
--- a/tesi/tesi.pdf
+++ b/tesi/tesi.pdf
--- a/tesi/tesi_unicode.org
+++ b/tesi/tesi_unicode.org
@ -130,7 +130,9 @@ compiler avoid the introduction of new bugs and that such
 modifications don't result in a different behavior than the current one. 
 \\
 One possible approach is to formally verify the pattern matching compiler
-implementation using a machine checked proof.
+implementation using a machine checked proof. Much effort has been
+spent on this topic and whole compilers have been proven either
+manually\cite{compuno,compdue,comptre} or using a proof assistant\cite{compquattro,compfive,compsei}.
 Another possibility, albeit with a weaker result, is to verify that
 each source program and target program pair are semantically correct.
 We chose the latter technique, translation validation because is easier to adopt in
@ -238,15 +240,6 @@ expressions that are lowered by the compiler into lambda expressions.
 possible immediate-integer or block-tag values.
 \end{comment}
 \begin{mathpar}
-  \begin{array}{l@{~}r@{~}r@{~}l}
-    \text{\emph{decision trees}} & D(\pi, e)
-    & \bnfeq & \Leaf {\cle(a)} \\
-    & & \bnfor & \Failure \\
-    & & \bnfor & \Switch a {\Fam {i \in I} {\pi_i, D_i}} \Dfb \\
-    & & \bnfor & \Guard {\cle(a)} {D_0} {D_1} \\
-    & & \bnfor & Unreachable 
-  \end{array}
-  \quad
  \begin{array}{l@{~}r@{~}r@{~}l}
    \text{\emph{environment}} & \sigma(v)
    & \bnfeq & [x_1 \mapsto v_1, \dots, v_n \mapsto v_n] \\
@ -265,76 +258,28 @@ expressions that are lowered by the compiler into lambda expressions.
  a(v_S), a(v_T), D_S(v_S), D_T(v_T) \quad (\text{\emph{omitted})}
  \end{array}
 \end{mathpar}
+\begin{mathpar}
+  \begin{array}{l@{~}r@{~}r@{~}l}
+    \text{\emph{decision trees}} & D(\pi, e)
+    & \bnfeq & \Leaf {\cle(a)} \\
+    & & \bnfor & \Failure \\
+    & & \bnfor & \Switch a {\Fam {i \in I} {\pi_i, D_i}} \Dfb \\
+    & & \bnfor & \Guard {\cle(a)} {D_0} {D_1} \\
+    & & \bnfor & Unreachable 
+  \end{array}
+\end{mathpar}
 The tree $\Leaf \cle$ returns a leaf expression $e$ in a captured
 environment $\sigma$ mapping variables to accessors.\\

 $\Failure$ expresses a match failure that occurs when no clause matches the input
 value.
 \\
-$\Switch a {\Fam {i \in I} {\pi_i, D_i}} D_{fallback}$ has one subtree $D_i$
+$\Switch a {\Fam {i \in I} {\pi_i, D_i}} {D_{fallback}}$ has one subtree $D_i$
 for every head constructor that appears in the pattern matching
 clauses, and a fallback case that is used when at least one variant of
 the constructor doesn't appear in the clauses. The presence of the
 fallback case does not imply non-exhaustive match clauses.
-\begin{minipage}{0.3\linewidth}
-  \scriptsize
-\begin{lstlisting}
-let f1 = function
-| true -> 1
-| false -> 0
-\end{lstlisting}
-\end{minipage}
-\hfill\vline\hfill
-\begin{minipage}{0.3\linewidth}
-  \scriptsize
-\begin{lstlisting}
-let f1 = function
-| true -> 1
-| _ -> 0
-\end{lstlisting}
-\end{minipage}
-\hfill\vline\hfill
-\begin{minipage}{0.3\linewidth}
-  \scriptsize
-\begin{lstlisting}
-let f1 = function
-| true -> 1
-\end{lstlisting}
-\end{minipage}
-
-\begin{minipage}{0.3\linewidth}
-  \scriptsize
-\begin{verbatim}
-    Switch(Root)
-    /          \
-(Bool true)  (Bool false)
-  /              \
-Leaf(Int 1)  Leaf(Int 0)
-\end{verbatim}
-\end{minipage}
-\hfill\vline\hfill
-\begin{minipage}{0.3\linewidth}
-  \scriptsize
-\begin{verbatim}
-    Switch(Root)
-    /          \
-(Bool true)    (`Fallback`)
-  /              \
-Leaf(Int 1)   Leaf(Int 0)
-\end{verbatim}
-\end{minipage}
-\hfill\vline\hfill
-\begin{minipage}{0.3\linewidth}
-  \scriptsize
-\begin{verbatim}
-    Switch(Root)
-    /          \
-  (Bool true)  (`Fallback`)
-  /              \
-Leaf(Int 1)    Failure
-\end{verbatim}
-\end{minipage}
-\hfill \\
+[[./files/table.png]]
 As we can see from these simple examples in which we pattern match on a
 boolean constructor the fallback node in the second case implicitly
 covers the path in which the value is equal to false while in the
@ -2169,12 +2114,78 @@ In this example the tool correctly handles /Failure/ nodes on both decision tree
 ;; include_file example9bis.lambda
 ;; include_file example9bis.trace

+* Conclusions
+In recent years bugs in the OCaml pattern matching compiler have
+been infrequent: only one in the last four years\cite{bug}.
+The main motivation for this work is the ongoing efforts of the OCaml
+core developers towards towards refactoring the
+OCaml pattern matching
+compiler\cite{ref1,ref2,ref3,ref4,ref5,ref6,ref7,ref8,ref9,ref10,ref11,ref12,ref13}.
+The prototype presented in this dissertation shows a
+sound approach for the verification of a correct compilation.
+Compared to similar works in the field, this work differs from Necula
+et al.\cite{trans_due} for the absence of false positives cases.
+The work of Kirchner et al.\cite{trans_55} is the most similar to this
+dissertation but has the drawbacks of introducing a new /IL/, requires
+a full fledged SAT solver (Zenon) and does not handle guards.
+The internship was focused on providing a working MVP. In order for
+the work to be integrated into the OCaml compiler toolchain the
+prototype needs to handle all primitive datatypes.
+While string and floats are trivial to add to the current
+implementation, OCaml supports extensible sum types\cite{exn} that may
+require a lot of effort to be successfully integrated into the tool.
+Extensible sum types are a product type whose set
+of values can be extended by declaring new constructors at runtime.
+Pattern matching on extensible sum types requires a wildcard pattern
+to handle unknown variant constructors. Exceptions were the first type
+of extensible sum type implemented in the language.
+The prototype uses /observe/ and /guard/ directives as a shortcut to
+avoid depending on the compiler. In order to support real-world code
+we need to instrument the compiler-libs to provide a two pass
+compilation where guards and leaf expressions can be compiled to
+Lambda code indipendently from the rest of the pattern match code.
+
 \begin{thebibliography}{9}
 \bibitem{cpp_pat} 
 Sergei Murzin, Michael Park, David Sankel, Dan Sarginson. 
 \textit{P1371R1: Pattern Matching Proposal for C++}. 
 http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1371r1.pdf

+
+\bibitem{compuno}
+John McCarthy and James Painter.
+\textit{Correctness of a compiler for arithmetic expressions}.
+Proceedings Symposium in Applied Mathematics, Vol. 19, pages 33–41. AMS, 1967.
+
+\bibitem{compdue}
+F. Lockwood Morris.
+\textit{Advice on structuring compilers and proving them correct}.
+1st annual ACM SIGACT-SIGPLAN Symposium on
+Principles of Programming Languages, pages 144–152.
+ACM Press, 1973
+
+\bibitem{comptre}
+David Lacey, Neil D. Jones, Eric Van Wyk, and
+Carl Christian Frederiksen.
+\textit{Proving correctness of compiler optimizations by temporal logic}.
+29th ACM Symposium on Principles of Programming
+Languages, pages 283–294. ACM Press, 2002.
+
+\bibitem{compquattro}
+Leroy Xavier. 
+\textit{Formal verification of a realistic compiler}.
+Communications of the ACM 52.7 (2009): 107-115.
+
+\bibitem{compfive}
+Berghofer, Stefan, and Martin Strecker. 
+\textit{Extracting a formally verified, fully executable compiler from a proof assistant}.
+Electronic Notes in Theoretical Computer Science 82.2 (2004): 377-394.
+
+\bibitem{compsei}
+Lochbihler, Andreas.
+\textit{Verifying a compiler for Java threads}.
+European Symposium on Programming. Springer, Berlin, Heidelberg, 2010.
+
 \bibitem{swift}
 Nayebi, Fatih.
 \textit{Swift Functional Programming}.
@ -2258,4 +2269,44 @@ Régis-Gianas, François Pottier Yann.
 \bibitem{internship}
 https://github.com/FraMecca/inria-internship

+\bibitem{bug}
+https://github.com/ocaml/ocaml/issues/7390
+
+\bibitem{ref1}
+https://github.com/ocaml/ocaml/issues/7390
+\bibitem{ref2}
+https://github.com/ocaml/ocaml/pull/8768
+\bibitem{ref3}
+https://github.com/ocaml/ocaml/pull/8850
+\bibitem{ref4}
+https://github.com/ocaml/ocaml/pull/9447
+\bibitem{ref5}
+https://github.com/ocaml/ocaml/pull/9464
+\bibitem{ref6}
+https://github.com/ocaml/ocaml/pull/9493
+\bibitem{ref7}
+https://github.com/ocaml/ocaml/pull/9646
+\bibitem{ref8}
+https://github.com/ocaml/ocaml/pull/9608
+\bibitem{ref9}
+https://github.com/ocaml/ocaml/pull/9599
+\bibitem{ref10}
+https://github.com/ocaml/ocaml/pull/9563
+\bibitem{ref11}
+https://github.com/ocaml/ocaml/pull/9520
+\bibitem{ref12}
+https://github.com/ocaml/ocaml/pull/9511
+\bibitem{ref13}
+https://github.com/ocaml/ocaml/pull/9647
+
+\bibitem{trans_55}
+Kirchner, Claude, Pierre-Etienne Moreau, and Antoine Reilles.
+\textit{Formal validation of pattern matching code}.
+Proceedings of the 7th ACM SIGPLAN international conference on Principles and practice of declarative programming. 2005.
+
+\bibitem{exn}
+OCaml core developers.
+\textit{OCaml Manual}
+https://caml.inria.fr/pub/docs/manual-ocaml/extensiblevariants.html
+
 \end{thebibliography}
--- a/todo.org
+++ b/todo.org
@ -54,11 +54,11 @@
 - [X] segreteria: tesi inglese
 - [X] Gatti: inglese
 - [X] Gatti: Coppo mio relatore
- [-] correzioni Coppo [0/5]
-  - [ ] esempi di esecuzione + test 
+- [-] correzioni Coppo [2/5]
+  - [X] esempi di esecuzione + test 
  - [ ] esplicitare mio contributo a fine introduzione o abstract
  - [ ] spiegare meglio ruolo symbolic exec
-  - [ ] Cosa manca per implementare al compilatore: passare da prototipo a cosa finita
+  - [X] Cosa manca per implementare al compilatore: passare da prototipo a cosa finita
  - [-] Introduzione [2/4]
    - [ ] dovrebbe essere un po' ampliata e migliorata verso la fine
    - [X] snellendo un po' la presentazione formale. 
@ -80,6 +80,3 @@
 - [X] Gabriel: quello che penso sulle equivalenze omesse e` giusto?
 - [X] Cambia le C di constraint tree in D
 - [ ] TODO on the org file
-HALP
-HALP!
-