diff --git a/tesi/conv.py b/tesi/conv.py index 7a2ab89..b726503 100644 --- a/tesi/conv.py +++ b/tesi/conv.py @@ -35,9 +35,24 @@ def convert(ch, mathmode): else: return symbols[ch] if ch in symbols else ch +def latex_errors_replacements(charlist): + text = ''.join(charlist).split(' ') + replacements = {'\n\end{comment}\n\end{enumerate}\n\end{enumerate}\n\n\subsection{Symbolic': + '\n\end{comment}\n\n\subsection{Symbolic'} + r_set = set(replacements.keys()) + for word in text: + it = r_set.intersection(set([word])) + if it: + yield from replacements[it.pop()] + else: + yield from word + yield ' ' + # convert symbols except the one requiring math mode modifiers firstpass = [convert(*c) for c in read_by_char(argv[1])] +# remove a latex error +secondpass = latex_errors_replacements(firstpass) -newfile = ''.join(firstpass) +newfile = ''.join(secondpass) with open(argv[2], 'w') as f: f.write(newfile) diff --git a/tesi/referenze/JAR15.pdf b/tesi/referenze/JAR15.pdf new file mode 100644 index 0000000..616a4fa Binary files /dev/null and b/tesi/referenze/JAR15.pdf differ diff --git a/tesi/referenze/malfunction.pdf b/tesi/referenze/malfunction.pdf new file mode 100644 index 0000000..14a53a8 Binary files /dev/null and b/tesi/referenze/malfunction.pdf differ diff --git a/tesi/referenze/symb_exec.pdf b/tesi/referenze/symb_exec.pdf new file mode 100644 index 0000000..ec966fd Binary files /dev/null and b/tesi/referenze/symb_exec.pdf differ diff --git a/tesi/referenze/validation-scheduling.pdf b/tesi/referenze/validation-scheduling.pdf new file mode 100644 index 0000000..d3235e1 Binary files /dev/null and b/tesi/referenze/validation-scheduling.pdf differ diff --git a/tesi/tesi.pdf b/tesi/tesi.pdf index 9d07d91..7c086de 100644 Binary files a/tesi/tesi.pdf and b/tesi/tesi.pdf differ diff --git a/tesi/tesi_unicode.org b/tesi/tesi_unicode.org index df058c0..1807b99 100644 --- a/tesi/tesi_unicode.org +++ b/tesi/tesi_unicode.org @@ -1,24 +1,24 @@ \begin{comment} -* TODO Scaletta [1/4] +* TODO Scaletta [1/5] - [X] Abstract - [-] Background [40%] - [X] Ocaml - [ ] Lambda code [0%] - - [ ] Untyped lambda form - - [ ] OCaml specific instructions - - [X] Pattern matching [100%] - - [X] Introduzione - - [X] Compilation to lambda form - - [ ] Translation Verification + - [ ] Compiler optimizations + - [ ] other instructions + - [X] Pattern matching - [ ] Symbolic execution - - [ ] Translation verification of the Pattern Matching Compiler + - [ ] Translation Validation + - [ ] Translation validation of the Pattern Matching Compiler - [ ] Source translation - [ ] Formal Grammar - [ ] Compilation of source patterns + - [ ] Rest? - [ ] Target translation - [ ] Formal Grammar - [ ] Symbolic execution of the lambda target - [ ] Equivalence between source and target + - [ ] Proof of correctness - [ ] Practical results \end{comment} @@ -67,11 +67,11 @@ OCaml shares many features with other dialects of ML, such as SML and Caml Light The main features of ML languages are the use of the Hindley-Milner type system that provides many advantages with respect to static type systems of traditional imperative and object oriented language such as C, C++ and Java, such as: - - Parametric polymorphism: in certain scenarios a function can accept more than one + - Polymorphism: in certain scenarios a function can accept more than one type for the input parameters. For example a function that computes the lenght of a list doesn't need to inspect the type of the elements of the list and for this reason - a List.length function can accept list of integers, list of strings and in general - list of any type. Such languages offer polymorphic functions through subtyping at + a List.length function can accept lists of integers, lists of strings and in general + lists of any type. Such languages offer polymorphic functions through subtyping at runtime only, while other languages such as C++ offer polymorphism through compile time templates and function overloading. With the Hindley-Milner type system each well typed function can have more than one @@ -104,7 +104,77 @@ inheritance, subtyping and dynamic binding, and modules, that provide a way to encapsulate definitions. Modules are checked statically and can be reificated through functors. -*** Pattern matching +** Lambda form compilation +\begin{comment} +https://dev.realworldocaml.org/compiler-backend.html +\end{comment} + +OCaml provides compilation in form of a byecode executable with an +optionally embeddable interpreter and a native executable that could +be statically linked to provide a single file executable. + +After the OCaml typechecker has proven that the program is type safe, +the OCaml compiler lower the code to /Lambda/, an s-expression based +language that assumes that its input has already been proved safe. +On the /Lambda/ representation of the source program, the compiler +performes a series of optimization passes before translating the +lambda form to assembly code. + +*** OCaml datatypes + +Most native data types in OCaml, such as integers, tuples, lists, +records, can be seen as instances of the following definition + +#+BEGIN_SRC ocaml +type t = Nil | One of int | Cons of int * t +#+END_SRC +that is a type /t/ with three constructors that define its complete +signature. +Every constructor has an arity. Nil, a constructor of arity 0, is +called a constant constructor. + +*** Lambda form types +A lambda form target file produced by the ocaml compiler consists of a +single s-expression. Every s-expression consist of /(/, a sequence of +elements separated by a whitespace and a closing /)/. +Elements of s-expressions are: +- Atoms: sequences of ascii letters, digits or symbols +- Variables +- Strings: enclosed in double quotes and possibly escaped +- S-expressions: allowing arbitrary nesting + +There are several numeric types: +- integers: that us either 31 or 63 bit two's complement arithmetic + depending on system word size, and also wrapping on overflow +- 32 bit and 64 bit integers: that use 32-bit and 64-bit two's complement arithmetic + with wrap on overflow +- big integers: offer integers with arbitrary precision +- floats: that use IEEE754 double-precision (64-bit) arithmetic with + the addition of the literals /infinity/, /neg_infinity/ and /nan/. + +The are varios numeric operations defined: + +- Arithmetic operations: +, -, *, /, % (modulo), neg (unary negation) +- Bitwise operations: &, |, ^, <<, >> (zero-shifting), a>> (sign extending) +- Numeric comparisons: <, >, <=, >=, == + +*** Functions + +Functions are defined using the following syntax, and close over all +bindings in scope: (lambda (arg1 arg2 arg3) BODY) +and are applied using the following syntax: (apply FUNC ARG ARG ARG) +Evaluation is eager. + +*** Bindings +The atom /let/ introduces a sequence of bindings: +(let BINDING BINDING BINDING ... BODY) + +*** Other atoms +TODO: if, switch, stringswitch... +TODO: magari esempi + + +** Pattern matching Pattern matching is a widely adopted mechanism to interact with ADT. C family languages provide branching on predicates through the use of @@ -205,7 +275,7 @@ pattern ::= value-name #+END_SRC - +\begin{comment} *** 1.2.1 Pattern matching compilation to lambda code During compilation, patterns are in the form @@ -232,17 +302,6 @@ p_{m,1} & p_{m,2} & \cdots & p_{m,n} & → lₘ \end{pmatrix} \end{equation*} -Most native data types in OCaml, such as integers, tuples, lists, -records, can be seen as instances of the following definition - -#+BEGIN_SRC ocaml -type t = Nil | One of int | Cons of int * t -#+END_SRC -that is a type /t/ with three constructors that define its complete -signature. -Every constructor has an arity. Nil, a constructor of arity 0, is -called a constant constructor. - The pattern /p/ matches a value /v/, written as p ≼ v, when one of the following rules apply @@ -390,8 +449,34 @@ following four rules: apply, and P₂ → L₂ containing the remaining rows. The algorithm is applied to both matrices. +\end{comment} + \begin{comment} #+BEGIN_COMMENT CITE paper? #+END_COMMENT’ \end{comment} + +** Symbolic execution + +** Translation validation +Translators, such as translators and code generators, are huge pieces of +software usually consisting of multiple subsystem and +constructing an actual specification of a translator implementation for +formal validation is a very long task. Moreover, different +translators implement different algorithms, so the correctness proof of +a translator cannot be generalized and reused to prove another translator. +Translation validation is an alternative to the verification of +existing translators that consists of taking the source and the target +(compiled) program and proving /a posteriori/ their semantic equivalence. + +- [ ] Techniques for translation validation +- [ ] What does semantically equivalent mean +- [ ] What happens when there is no semantic equivalence +- [ ] Translation validation through symbolic execution + +** Translation validation of the Pattern Matching Compiler + - [ ] Source translation + - [ ] Formal Grammar + - [ ] Compilation of source patterns + - [ ] Rest? diff --git a/tesi/tesi_unicode.tex b/tesi/tesi_unicode.tex index 050fe9e..e9319d1 100644 --- a/tesi/tesi_unicode.tex +++ b/tesi/tesi_unicode.tex @@ -1,4 +1,4 @@ -% Created 2020-02-24 Mon 19:44 +% Created 2020-03-02 Mon 14:31 % Intended LaTeX compiler: pdflatex \documentclass[11pt]{article} \usepackage[utf8]{inputenc} @@ -36,8 +36,8 @@ \maketitle \begin{comment} -\section{{\bfseries\sffamily TODO} Scaletta [1/4]} -\label{sec:orgdce55fc} +\section{{\bfseries\sffamily TODO} Scaletta [1/5]} +\label{sec:org62901c2} \begin{itemize} \item[{$\boxtimes$}] Abstract \item[{$\boxminus$}] Background [40\%] @@ -45,18 +45,14 @@ \item[{$\boxtimes$}] Ocaml \item[{$\square$}] Lambda code [0\%] \begin{itemize} -\item[{$\square$}] Untyped lambda form -\item[{$\square$}] OCaml specific instructions +\item[{$\square$}] Compiler optimizations +\item[{$\square$}] other instructions \end{itemize} -\item[{$\boxtimes$}] Pattern matching [100\%] -\begin{itemize} -\item[{$\boxtimes$}] Introduzione -\item[{$\boxtimes$}] Compilation to lambda form -\end{itemize} -\item[{$\square$}] Translation Verification +\item[{$\boxtimes$}] Pattern matching \item[{$\square$}] Symbolic execution +\item[{$\square$}] Translation Validation \end{itemize} -\item[{$\square$}] Translation verification of the Pattern Matching Compiler +\item[{$\square$}] Translation validation of the Pattern Matching Compiler \begin{itemize} \item[{$\square$}] Source translation \begin{itemize} @@ -70,6 +66,7 @@ \end{itemize} \item[{$\square$}] Equivalence between source and target \end{itemize} +\item[{$\square$}] Proof of correctness \item[{$\square$}] Practical results \end{itemize} @@ -92,10 +89,10 @@ blackboxes that are not evaluated in the context of the verification. \end{abstract} \section{Background} -\label{sec:orgf336654} +\label{sec:orgbf4de70} \subsection{OCaml} -\label{sec:org885a2ca} +\label{sec:orga9a97c9} Objective Caml (OCaml) is a dialect of the ML (Meta-Language) family of programming languages. OCaml shares many features with other dialects of ML, such as SML and Caml Light, @@ -103,11 +100,11 @@ The main features of ML languages are the use of the Hindley-Milner type system provides many advantages with respect to static type systems of traditional imperative and object oriented language such as C, C++ and Java, such as: \begin{itemize} -\item Parametric polymorphism: in certain scenarios a function can accept more than one +\item Polymorphism: in certain scenarios a function can accept more than one type for the input parameters. For example a function that computes the lenght of a list doesn't need to inspect the type of the elements of the list and for this reason -a List.length function can accept list of integers, list of strings and in general -list of any type. Such languages offer polymorphic functions through subtyping at +a List.length function can accept lists of integers, lists of strings and in general +lists of any type. Such languages offer polymorphic functions through subtyping at runtime only, while other languages such as C++ offer polymorphism through compile time templates and function overloading. With the Hindley-Milner type system each well typed function can have more than one @@ -141,9 +138,92 @@ inheritance, subtyping and dynamic binding, and modules, that provide a way to encapsulate definitions. Modules are checked statically and can be reificated through functors. +\subsection{Lambda form compilation} +\label{sec:org2d10d35} +\begin{comment} +https://dev.realworldocaml.org/compiler-backend.html +\end{comment} + +OCaml provides compilation in form of a byecode executable with an +optionally embeddable interpreter and a native executable that could +be statically linked to provide a single file executable. + +After the OCaml typechecker has proven that the program is type safe, +the OCaml compiler lower the code to \emph{Lambda}, an s-expression based +language that assumes that its input has already been proved safe. +On the \emph{Lambda} representation of the source program, the compiler +performes a series of optimization passes before translating the +lambda form to assembly code. + \begin{enumerate} -\item Pattern matching -\label{sec:org879bb72} +\item OCaml datatypes +\label{sec:orgd605d09} + +Most native data types in OCaml, such as integers, tuples, lists, +records, can be seen as instances of the following definition + +\begin{verbatim} +type t = Nil | One of int | Cons of int * t +\end{verbatim} +that is a type \emph{t} with three constructors that define its complete +signature. +Every constructor has an arity. Nil, a constructor of arity 0, is +called a constant constructor. + +\item Lambda form types +\label{sec:org1ee20a6} +A lambda form target file produced by the ocaml compiler consists of a +single s-expression. Every s-expression consist of \emph{(}, a sequence of +elements separated by a whitespace and a closing \emph{)}. +Elements of s-expressions are: +\begin{itemize} +\item Atoms: sequences of ascii letters, digits or symbols +\item Variables +\item Strings: enclosed in double quotes and possibly escaped +\item S-expressions: allowing arbitrary nesting +\end{itemize} + +There are several numeric types: +\begin{itemize} +\item integers: that us either 31 or 63 bit two's complement arithmetic +depending on system word size, and also wrapping on overflow +\item 32 bit and 64 bit integers: that use 32-bit and 64-bit two's complement arithmetic +with wrap on overflow +\item big integers: offer integers with arbitrary precision +\item floats: that use IEEE754 double-precision (64-bit) arithmetic with +the addition of the literals \emph{infinity}, \emph{neg\_infinity} and \emph{nan}. +\end{itemize} + +The are varios numeric operations defined: + +\begin{itemize} +\item Arithmetic operations: +, -, *, /, \% (modulo), neg (unary negation) +\item Bitwise operations: \&, |, \^{}, <<, >> (zero-shifting), a>> (sign extending) +\item Numeric comparisons: <, >, <=, >=, == +\end{itemize} + +\item Functions +\label{sec:org914d5eb} + +Functions are defined using the following syntax, and close over all +bindings in scope: (lambda (arg1 arg2 arg3) BODY) +and are applied using the following syntax: (apply FUNC ARG ARG ARG) +Evaluation is eager. + +\item Bindings +\label{sec:org055206b} +The atom \emph{let} introduces a sequence of bindings: +(let BINDING BINDING BINDING \ldots{} BODY) + +\item Other atoms +\label{sec:org0f92182} +TODO: if, switch, stringswitch\ldots{} +TODO: magari esempi +\end{enumerate} + + +\subsection{Pattern matching} +\label{sec:org9876fb9} Pattern matching is a widely adopted mechanism to interact with ADT. C family languages provide branching on predicates through the use of @@ -250,9 +330,10 @@ pattern ::= value-name \end{verbatim} - +\begin{comment} +\begin{enumerate} \item 1.2.1 Pattern matching compilation to lambda code -\label{sec:org5e1ded7} +\label{sec:org4d27bd9} During compilation, patterns are in the form \begin{center} @@ -282,17 +363,6 @@ p_{m,1} & p_{m,2} & \cdots & p_{m,n} & → lₘ \end{pmatrix} \end{equation*} -Most native data types in OCaml, such as integers, tuples, lists, -records, can be seen as instances of the following definition - -\begin{verbatim} -type t = Nil | One of int | Cons of int * t -\end{verbatim} -that is a type \emph{t} with three constructors that define its complete -signature. -Every constructor has an arity. Nil, a constructor of arity 0, is -called a constant constructor. - The pattern \emph{p} matches a value \emph{v}, written as p ≼ v, when one of the following rules apply @@ -329,7 +399,7 @@ are the same \begin{enumerate} \item Initial state of the compilation -\label{sec:org886c2ed} +\label{sec:orge9c6bc4} Given a source of the following form: @@ -458,6 +528,8 @@ apply, and P₂ → L₂ containing the remaining rows. The algorithm is applied to both matrices. \end{enumerate} +\end{comment} + \begin{comment} #+BEGIN_COMMENT CITE paper? @@ -465,4 +537,26 @@ CITE paper? \end{comment} \end{enumerate} \end{enumerate} + +\subsection{Symbolic execution} +\label{sec:orgdb60b84} + +\subsection{Translation validation} +\label{sec:org096d047} +Translators, such as translators and code generators, are huge pieces of +software usually consisting of multiple subsystem and +constructing an actual specification of a translator implementation for +formal validation is a very long task. Moreover, different +translators implement different algorithms, so the correctness proof of +a translator cannot be generalized and reused to prove another translator. +Translation validation is an alternative to the verification of +existing translators that consists of taking the source and the target +(compiled) program and proving \emph{a posteriori} their semantic equivalence. + +\begin{itemize} +\item[{$\square$}] Techniques for translation validation +\item[{$\square$}] What does semantically equivalent mean +\item[{$\square$}] What happens when there is no semantic equivalence +\item[{$\square$}] Translation validation through symbolic execution +\end{itemize} \end{document}