diff --git a/tesi/.gitignore b/tesi/.gitignore new file mode 100644 index 0000000..0df34f0 --- /dev/null +++ b/tesi/.gitignore @@ -0,0 +1,278 @@ +tesi.tex + +## Core latex/pdflatex auxiliary files: +*.aux +*.lof +*.log +*.lot +*.fls +*.out +*.toc +*.fmt +*.fot +*.cb +*.cb2 +.*.lb + +## Intermediate documents: +*.dvi +*.xdv +*-converted-to.* +# these rules might exclude image files for figures etc. +# *.ps +# *.eps +# *.pdf + +## Generated if empty string is given at "Please type another file name for output:" +.pdf + +## Bibliography auxiliary files (bibtex/biblatex/biber): +*.bbl +*.bcf +*.blg +*-blx.aux +*-blx.bib +*.run.xml + +## Build tool auxiliary files: +*.fdb_latexmk +*.synctex +*.synctex(busy) +*.synctex.gz +*.synctex.gz(busy) +*.pdfsync + +## Build tool directories for auxiliary files +# latexrun +latex.out/ + +## Auxiliary and intermediate files from other packages: +# algorithms +*.alg +*.loa + +# achemso +acs-*.bib + +# amsthm +*.thm + +# beamer +*.nav +*.pre +*.snm +*.vrb + +# changes +*.soc + +# comment +*.cut + +# cprotect +*.cpt + +# elsarticle (documentclass of Elsevier journals) +*.spl + +# endnotes +*.ent + +# fixme +*.lox + +# feynmf/feynmp +*.mf +*.mp +*.t[1-9] +*.t[1-9][0-9] +*.tfm + +#(r)(e)ledmac/(r)(e)ledpar +*.end +*.?end +*.[1-9] +*.[1-9][0-9] +*.[1-9][0-9][0-9] +*.[1-9]R +*.[1-9][0-9]R +*.[1-9][0-9][0-9]R +*.eledsec[1-9] +*.eledsec[1-9]R +*.eledsec[1-9][0-9] +*.eledsec[1-9][0-9]R +*.eledsec[1-9][0-9][0-9] +*.eledsec[1-9][0-9][0-9]R + +# glossaries +*.acn +*.acr +*.glg +*.glo +*.gls +*.glsdefs +*.lzo +*.lzs + +# uncomment this for glossaries-extra (will ignore makeindex's style files!) +# *.ist + +# gnuplottex +*-gnuplottex-* + +# gregoriotex +*.gaux +*.gtex + +# htlatex +*.4ct +*.4tc +*.idv +*.lg +*.trc +*.xref + +# hyperref +*.brf + +# knitr +*-concordance.tex +# TODO Comment the next line if you want to keep your tikz graphics files +*.tikz +*-tikzDictionary + +# listings +*.lol + +# luatexja-ruby +*.ltjruby + +# makeidx +*.idx +*.ilg +*.ind + +# minitoc +*.maf +*.mlf +*.mlt +*.mtc[0-9]* +*.slf[0-9]* +*.slt[0-9]* +*.stc[0-9]* + +# minted +_minted* +*.pyg + +# morewrites +*.mw + +# nomencl +*.nlg +*.nlo +*.nls + +# pax +*.pax + +# pdfpcnotes +*.pdfpc + +# sagetex +*.sagetex.sage +*.sagetex.py +*.sagetex.scmd + +# scrwfile +*.wrt + +# sympy +*.sout +*.sympy +sympy-plots-for-*.tex/ + +# pdfcomment +*.upa +*.upb + +# pythontex +*.pytxcode +pythontex-files-*/ + +# tcolorbox +*.listing + +# thmtools +*.loe + +# TikZ & PGF +*.dpth +*.md5 +*.auxlock + +# todonotes +*.tdo + +# vhistory +*.hst +*.ver + +# easy-todo +*.lod + +# xcolor +*.xcp + +# xmpincl +*.xmpi + +# xindy +*.xdy + +# xypic precompiled matrices and outlines +*.xyc +*.xyd + +# endfloat +*.ttt +*.fff + +# Latexian +TSWLatexianTemp* + +## Editors: +# WinEdt +*.bak +*.sav + +# Texpad +.texpadtmp + +# LyX +*.lyx~ + +# Kile +*.backup + +# gummi +.*.swp + +# KBibTeX +*~[0-9]* + +# TeXnicCenter +*.tps + +# auto folder when using emacs and auctex +./auto/* +*.el + +# expex forward references with \gathertags +*-tags.tex + +# standalone packages +*.sta + +# Makeindex log files +*.lpz diff --git a/tesi/Makefile b/tesi/Makefile index ed6602b..930f6e1 100644 --- a/tesi/Makefile +++ b/tesi/Makefile @@ -1,6 +1,6 @@ SRC = tesi.tex AUX = tesi.aux -DEL = tesi.aux tesi.bbl tesi.blg tesi.log tesi.out +DEL = tesi.aux tesi.bbl tesi.blg tesi.log tesi.out tesi_unicode.tex tesi.pdf tesi.tex texput.log NAME = tesi tesi: @@ -9,9 +9,14 @@ tesi: rm -f $(DEL) @echo @echo "=== Building from scratch ===" + emacs tesi_unicode.org -f org-latex-export-to-latex --kill + python3 conv.py tesi_unicode.tex tesi.tex pdflatex $(SRC) bibtex $(AUX) pdflatex $(SRC) pdflatex $(SRC) @echo @echo "=== All done. Generated $(NAME).pdf ===" + +clean: + rm -f $(DEL) diff --git a/tesi/conv.py b/tesi/conv.py index 0fcc022..fe70105 100644 --- a/tesi/conv.py +++ b/tesi/conv.py @@ -1,21 +1,45 @@ import json +import re from sys import argv allsymbols = json.load(open('./unicode-latex.json')) -symbols = ['≼', '→', '⊀', '⋠', '≺', '∀', '∈', '₂', '₁', 'ₐ', 'ₘ', 'ₙ', 'ᵢ' ] -symbols = {s: allsymbols[s] for s in symbols} -print(symbols) +mysymbols = ['≡', '≼', '→', '⊀', '⋠', '≺', '∀', '∈', '₂', '₁', 'ₐ', 'ₘ', 'ₙ', 'ᵢ', 'ⁱ'] +symbols = {s: allsymbols[s] for s in mysymbols} +mathsymbols = {s: '$'+allsymbols[s]+'$' for s in symbols} def read_by_char(fname): + # Yield character and True/False if inside mathmode block + mathmode = False + mathmode_begin = set(['\\begin{equation*}', '\\begin{equation}']) + mathmode_end = set(['\\end{equation*}', '\\end{equation}']) + cnt = 0 with open(fname, 'r') as fp: for line in fp.readlines(): + cnt += 1 + words = [w.strip() for w in line.split(' ')] + if mathmode_begin.intersection(words): + assert mathmode == False + mathmode = True + elif mathmode_end.intersection(words): + assert mathmode == True, f'Line: {words}, number: {cnt}' + mathmode = False + for ch in line: - yield ch + yield ch, mathmode -def convert(ch): - return symbols[ch] if ch in symbols else ch +def convert(ch, mathmode): + if not mathmode: + return mathsymbols[ch] if ch in mathsymbols else ch + else: + return symbols[ch] if ch in symbols else ch -newfile = [convert(ch) for ch in read_by_char(argv[1])] +# convert symbols except the one requiring math mode modifiers +# all passes produces a list of words that must be joined by ' '.join( ) +firstpass = ''.join([convert(*c) for c in read_by_char(argv[1])]).split(' ') +# secondpass = insert_math(''.join(firstpass).split(' ')) +# thirdpass = escape_outside_mathmode(firstpass) + +newfile = ' '.join(firstpass) with open(argv[2], 'w') as f: - f.writelines(newfile) + f.write(newfile) diff --git a/tesi/prova.md b/tesi/prova.md deleted file mode 100644 index d7f71c1..0000000 --- a/tesi/prova.md +++ /dev/null @@ -1,320 +0,0 @@ - -# TODO Scaletta [1/2] - -- [X] Abstract -- [-] Background [25%] - - [X] Ocaml - - [ ] Lambda code - - [ ] Pattern matching - - [ ] Translation Verification - - [ ] Symbolic execution - -\begin{abstract} - -This dissertation presents an algorithm for the translation validation of the OCaml -pattern matching compiler. Given the source representation of the target program and the -target program compiled in untyped lambda form, the algoritmhm is capable of modelling -the source program in terms of symbolic constraints on it's branches and apply symbolic -execution on the untyped lambda representation in order to validate wheter the compilation -produced a valid result. -In this context a valid result means that for every input in the domain of the source -program the untyped lambda translation produces the same output as the source program. -The input of the program is modelled in terms of symbolic constraints closely related to -the runtime representation of OCaml objects and the output consists of OCaml code -blackboxes that are not evaluated in the context of the verification. - -\end{abstract} - - -# 1. Background - - -## 1.1 OCaml - -Objective Caml (OCaml) is a dialect of the ML (Meta-Language) family of programming -languages. -OCaml shares many features with other dialects of ML, such as SML and Caml Light, -The main features of ML languages are the use of the Hindley-Milner type system that -provides many advantages with respect to static type systems of traditional imperative and object -oriented language such as C, C++ and Java, such as: - -- Parametric polymorphism: in certain scenarios a function can accept more than one - -type for the input parameters. For example a function that computes the lenght of a -list doesn't need to inspect the type of the elements of the list and for this reason -a List.length function can accept list of integers, list of strings and in general -list of any type. Such languages offer polymorphic functions through subtyping at -runtime only, while other languages such as C++ offer polymorphism through compile -time templates and function overloading. -With the Hindley-Milner type system each well typed function can have more than one -type but always has a unique best type, called the *principal type*. -For example the principal type of the List.length function is "For any *a*, function from -list of *a* to *int*" and *a* is called the *type parameter*. - -- Strong typing: Languages such as C and C++ allow the programmer to operate on data - -without considering its type, mainly through pointers. Other languages such as C# -and Go allow type erasure so at runtime the type of the data can't be queried. -In the case of programming languages using an Hindley-Milner type system the -programmer is not allowed to operate on data by ignoring or promoting its type. - -- Type Inference: the principal type of a well formed term can be inferred without any - -annotation or declaration. - -- Algebraic data types: types that are modelled by the use of two - - algebraic operations, sum and product. - A sum type is a type that can hold of many different types of - objects, but only one at a time. For example the sum type defined - as *A + B* can hold at any moment a value of type A or a value of - type B. Sum types are also called tagged union or variants. - A product type is a type constructed as a direct product - of multiple types and contains at any moment one instance for - every type of its operands. Product types are also called tuples - or records. Algebraic data types can be recursive - in their definition and can be combined. -Moreover ML languages are functional, meaning that functions are -treated as first class citizens and variables are immutable, -although mutable statements and imperative constructs are permitted. -In addition to that OCaml features an object system, that provides -inheritance, subtyping and dynamic binding, and modules, that -provide a way to encapsulate definitions. Modules are checked -statically and can be reificated through functors. - -1. TODO 1.2 Pattern matching [37%] - - - [ ] capisci come mettere gli esempi uno accanto all'altro - - Pattern matching is a widely adopted mechanism to interact with ADT. - C family languages provide branching on predicates through the use of - if statements and switch statements. - Pattern matching is a mechanism for destructuring and analyzing data - structures for the presence of values simbolically represented as - tokens. One common example of pattern matching is the use of regular - expressions on strings. OCaml provides pattern matching on ADT, - primitive data types. - - - [X] Esempio enum, C e Ocaml - - type color = | Red | Blue | Green - - begin match color with - | Red -> print "red" - | Blue -> print "red" - | Green -> print "red" - - OCaml provides tokens to express data destructoring - - - [X] Esempio destructor list - - - begin match list with - | [ ] -> print "empty list" - | element1 :: [ ] -> print "one element" - | element1 :: element2 :: [ ] -> print "two elements" - | head :: tail-> print "head followed by many elements" - - - [X] Esempio destructor tuples - - - begin match tuple with - | (Some _, Some _) -> print "Pair of optional types" - | (Some _, None) -> print "Pair of optional types, last null" - | (None, Some _) -> print "Pair of optional types, first null" - | (None, None) -> print "Pair of optional types, both null" - - Pattern clauses can make the use of *guards* to test predicates and - variables can be binded in scope. - - - [ ] Esempio binding e guards - - - begin match token_list with - | "switch"::var::"{"::rest -> - | "case"::":"::var::rest when is_int var -> - | "case"::":"::var::rest when is_string var -> - | "}"::[ ] -> stop () - | "}"::rest -> error "syntax error: " rest - - - [ ] Un altro esempio con destructors e tutto i lresto - - In general pattern matching on primitive and algebraic data types takes the - following form. - - - [ ] Esempio informale - - It can be described more formally through a BNF grammar. - - - [ ] BNF - - - [ ] Come funziona il pattern matching? - -2. TODO 1.2.1 Pattern matching compilation to lambda code - - - [ ] Da tabella a matrice - - Formally, pattern and values are defined as follow: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
pattern ::=Patterns
\_wildcard
xvariable
c(p1,p2,…,pnconstructor pattern
(p1| p2)or-pattern
- - - - - - - - - - - - - - - - - - - - - - -
values ::=Values
c(v1, v2, …, vn)constructor value
- - The entire pattern matching code can be represented as a clause matrix - that associates rows of patterns (pi,1, pi,2, …, pi,n) to - lambda code action lⁱ - - \begin{equation*} - (P → L) = - \begin{pmatrix} - p_{1,1} & p_{1,2} & \cdots & p_{1,n} → l₁ \\ - p_{2,1} & p_{2,2} & \cdots & p_{2,n} → l₂ \\ - \vdots & \vdots & \ddots \vdots → \vdots \\ - p_{m,1} & p_{m,2} & \cdots & p_{m,n} → lₘ - \end{pmatrix} - \end{equation*} - - Most native data types in OCaml, such as integers, tuples, lists, - records, can be seen as instances of the following definition - - type t = Nil | One of int | Cons of int * t - - that is a type *t* with three constructors that define its complete - signature. - Every constructor has an arity. Nil, a constructor of arity 0, is - called a constant constructor. - - The pattern *p* matches a value *v*, written as p ≼ v, when - one of the following rules apply - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
\_v 
xv 
(p₁ |\\ p₂)viff p₁ ≼ v or p₂ ≼ v
c(p₁, p₂, …, pₐ)c(v₁, v₂, …, vₐ)iff (p₁, p₂, …, pₐ) ≼ (v₁, v₂, …, vₐ)
(p₁, p₂, …, pₐ)(v₁, v₂, …, vₐ)iff pᵢ ≼ vᵢ ∀i ∈ [1..a]
- - We can also say that *v* is an *instance* of *p*. - - When we consider the pattern matrix P we say that the value vector - \vv{v} = (v₁, v₂, …, vᵢ) matches the line number i in P if and only if the following two - conditions are satisfied: - - - \[ p_{i,1} & p_{i,2} & \cdots & p_{i,n} \] ≼ (v₁, v₂, …, vᵢ) - - \[ ∀j < i p_{j,1} & p_{j,2} & \cdots & p_{j,n} \] ⋠ (v₁, v₂, …, vᵢ) - - We can define the following three relations with respect to patterns: - - - Patter p is less precise than pattern q, writtens p ≼ q when all - instances of q are instances of p - - Pattern p and q are equivalent, written p ≡ q, when their instances - are the same - - Patterns p and q are compatible when they share a common instance - diff --git a/tesi/tesi.pdf b/tesi/tesi.pdf new file mode 100644 index 0000000..9ebe838 Binary files /dev/null and b/tesi/tesi.pdf differ diff --git a/tesi/tesi_unicode.org b/tesi/tesi_unicode.org index 5dd2bbc..e2f29a2 100644 --- a/tesi/tesi_unicode.org +++ b/tesi/tesi_unicode.org @@ -32,8 +32,6 @@ #+LaTeX_HEADER: \usepackage{algorithm} #+LaTeX_HEADER: \usepackage{algpseudocode} #+LaTeX_HEADER: \usepackage{amsmath,amssymb,amsthm} -#+LaTeX_HEADER: \usepackage[utf8]{inputenc} -#+LaTeX_HEADER: \usepackage[T1]{fontenc} #+Latex_HEADER: \newtheorem{definition}{Definition} #+LaTeX_HEADER: \usepackage{graphicx} #+LaTeX_HEADER: \usepackage{listings} @@ -58,9 +56,9 @@ blackboxes that are not evaluated in the context of the verification. \end{abstract} -* 1. Background +* Background -** 1.1 OCaml +** OCaml Objective Caml (OCaml) is a dialect of the ML (Meta-Language) family of programming languages. OCaml shares many features with other dialects of ML, such as SML and Caml Light, @@ -68,34 +66,34 @@ The main features of ML languages are the use of the Hindley-Milner type system provides many advantages with respect to static type systems of traditional imperative and object oriented language such as C, C++ and Java, such as: - Parametric polymorphism: in certain scenarios a function can accept more than one - type for the input parameters. For example a function that computes the lenght of a - list doesn't need to inspect the type of the elements of the list and for this reason - a List.length function can accept list of integers, list of strings and in general - list of any type. Such languages offer polymorphic functions through subtyping at - runtime only, while other languages such as C++ offer polymorphism through compile - time templates and function overloading. - With the Hindley-Milner type system each well typed function can have more than one - type but always has a unique best type, called the /principal type/. - For example the principal type of the List.length function is "For any /a/, function from - list of /a/ to /int/" and /a/ is called the /type parameter/. + type for the input parameters. For example a function that computes the lenght of a + list doesn't need to inspect the type of the elements of the list and for this reason + a List.length function can accept list of integers, list of strings and in general + list of any type. Such languages offer polymorphic functions through subtyping at + runtime only, while other languages such as C++ offer polymorphism through compile + time templates and function overloading. + With the Hindley-Milner type system each well typed function can have more than one + type but always has a unique best type, called the /principal type/. + For example the principal type of the List.length function is "For any /a/, function from + list of /a/ to /int/" and /a/ is called the /type parameter/. - Strong typing: Languages such as C and C++ allow the programmer to operate on data - without considering its type, mainly through pointers. Other languages such as C# - and Go allow type erasure so at runtime the type of the data can't be queried. - In the case of programming languages using an Hindley-Milner type system the - programmer is not allowed to operate on data by ignoring or promoting its type. + without considering its type, mainly through pointers. Other languages such as C# + and Go allow type erasure so at runtime the type of the data can't be queried. + In the case of programming languages using an Hindley-Milner type system the + programmer is not allowed to operate on data by ignoring or promoting its type. - Type Inference: the principal type of a well formed term can be inferred without any - annotation or declaration. + annotation or declaration. - Algebraic data types: types that are modelled by the use of two - algebraic operations, sum and product. - A sum type is a type that can hold of many different types of - objects, but only one at a time. For example the sum type defined - as /A + B/ can hold at any moment a value of type A or a value of - type B. Sum types are also called tagged union or variants. - A product type is a type constructed as a direct product - of multiple types and contains at any moment one instance for - every type of its operands. Product types are also called tuples - or records. Algebraic data types can be recursive - in their definition and can be combined. + algebraic operations, sum and product. + A sum type is a type that can hold of many different types of + objects, but only one at a time. For example the sum type defined + as /A + B/ can hold at any moment a value of type A or a value of + type B. Sum types are also called tagged union or variants. + A product type is a type constructed as a direct product + of multiple types and contains at any moment one instance for + every type of its operands. Product types are also called tuples + or records. Algebraic data types can be recursive + in their definition and can be combined. Moreover ML languages are functional, meaning that functions are treated as first class citizens and variables are immutable, although mutable statements and imperative constructs are permitted. @@ -104,7 +102,7 @@ inheritance, subtyping and dynamic binding, and modules, that provide a way to encapsulate definitions. Modules are checked statically and can be reificated through functors. -*** TODO 1.2 Pattern matching [37%] +*** TODO Pattern matching [37%] - [ ] capisci come mettere gli esempi uno accanto all'altro Pattern matching is a widely adopted mechanism to interact with ADT. @@ -183,15 +181,15 @@ It can be described more formally through a BNF grammar. - [ ] Da tabella a matrice Formally, pattern are defined as follows: -| pattern ::= | Patterns | -|----------------+---------------------| -| _ | wildcard | -| x | variable | -| c(p₁,p₂,...,pₙ | constructor pattern | -| (p₁\vert p₂) | or-pattern | +| pattern | Patterns | +|-----------------+---------------------| +| _ | wildcard | +| x | variable | +| c(p₁,p₂,...,pₙ) | constructor pattern | +| (p₁\vert p₂) | or-pattern | Values are defined as follows: -| values ::= | Values | +| values | Values | |---------------------+-------------------| | c(v₁, v₂, ..., vₙ) | constructor value | @@ -201,10 +199,10 @@ lambda code action lⁱ \begin{equation*} (P → L) = \begin{pmatrix} -p_{1,1} & p_{1,2} & \cdots & p_{1,n} → l₁ \\ -p_{2,1} & p_{2,2} & \cdots & p_{2,n} → l₂ \\ -\vdots & \vdots & \ddots \vdots → \vdots \\ -p_{m,1} & p_{m,2} & \cdots & p_{m,n} → lₘ +p_{1,1} & p_{1,2} & \cdots & p_{1,n} & → l₁ \\ +p_{2,1} & p_{2,2} & \cdots & p_{2,n} & → l₂ \\ +\vdots & \vdots & \ddots & \vdots & → \vdots \\ +p_{m,1} & p_{m,2} & \cdots & p_{m,n} & → lₘ \end{pmatrix} \end{equation*} @@ -219,24 +217,28 @@ signature. Every constructor has an arity. Nil, a constructor of arity 0, is called a constant constructor. -The pattern /p/ matches a value /v/, written as p ≼ v, when -one of the following rules apply +The pattern /p/ matches a value /v/, written as p ≼ v, when one of the +following rules apply -| _ | ≼ | v | | -| x | ≼ | v | | +| | | | | +|--------------------+---+--------------------+-------------------------------------------| +| _ | ≼ | v | ∀v | +| x | ≼ | v | ∀v | | (p₁ \vert\ p₂) | ≼ | v | iff p₁ ≼ v or p₂ ≼ v | | c(p₁, p₂, ..., pₐ) | ≼ | c(v₁, v₂, ..., vₐ) | iff (p₁, p₂, ..., pₐ) ≼ (v₁, v₂, ..., vₐ) | | (p₁, p₂, ..., pₐ) | ≼ | (v₁, v₂, ..., vₐ) | iff pᵢ ≼ vᵢ ∀i ∈ [1..a] | +|--------------------+---+--------------------+-------------------------------------------| + We can also say that /v/ is an /instance/ of /p/. When we consider the pattern matrix P we say that the value vector -\vv{v} = (v₁, v₂, ..., vᵢ) matches the line number i in P if and only if the following two +$\vec{v}$ = (v₁, v₂, ..., vᵢ) matches the line number i in P if and only if the following two conditions are satisfied: -- \[ p_{i,1} & p_{i,2} & \cdots & p_{i,n} \] ≼ (v₁, v₂, ..., vᵢ) -- \[ ∀j < i p_{j,1} & p_{j,2} & \cdots & p_{j,n} \] ⋠ (v₁, v₂, ..., vᵢ) +- p_{i,1}, p_{i,2}, \cdots, p_{i,n} ≼ (v₁, v₂, ..., vᵢ) +- ∀j < i p_{j,1}, p_{j,2}, \cdots, p_{j,n} ⋠ (v₁, v₂, ..., vᵢ) We can define the following three relations with respect to patterns: -- Patter p is less precise than pattern q, writtens p ≼ q when all +- Patter p is less precise than pattern q, written p ≼ q, when all instances of q are instances of p - Pattern p and q are equivalent, written p ≡ q, when their instances are the same @@ -255,7 +257,7 @@ match x with #+END_SRC ocaml the initial input of the algorithm consists of a vector of variables -\vv{x} = (x₁, x₂, ..., xₙ) of size n +$\vec{x}$ = (x₁, x₂, ..., xₙ) of size n and a clause matrix P → L of width n and height m. \begin{equation*} diff --git a/tesi/tesi_unicode.tex b/tesi/tesi_unicode.tex new file mode 100644 index 0000000..83db8d4 --- /dev/null +++ b/tesi/tesi_unicode.tex @@ -0,0 +1,358 @@ +% Created 2020-02-24 Mon 14:35 +% Intended LaTeX compiler: pdflatex +\documentclass[11pt]{article} +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{graphicx} +\usepackage{grffile} +\usepackage{longtable} +\usepackage{wrapfig} +\usepackage{rotating} +\usepackage[normalem]{ulem} +\usepackage{amsmath} +\usepackage{textcomp} +\usepackage{amssymb} +\usepackage{capt-of} +\usepackage{hyperref} +\usepackage[utf8]{inputenc} +\usepackage{algorithm} +\usepackage{algpseudocode} +\usepackage{amsmath,amssymb,amsthm} +\newtheorem{definition}{Definition} +\usepackage{graphicx} +\usepackage{listings} +\usepackage{color} +\author{Francesco Mecca} +\date{} +\title{Translation Verification of the OCaml pattern matching compiler} +\hypersetup{ + pdfauthor={Francesco Mecca}, + pdftitle={Translation Verification of the OCaml pattern matching compiler}, + pdfkeywords={}, + pdfsubject={}, + pdfcreator={Emacs 26.3 (Org mode 9.1.9)}, + pdflang={English}} +\begin{document} + +\maketitle +\section{{\bfseries\sffamily TODO} Scaletta [1/2]} +\label{sec:orgd539212} +\begin{itemize} +\item[{$\boxtimes$}] Abstract +\item[{$\boxminus$}] Background [20\%] +\begin{itemize} +\item[{$\boxtimes$}] Ocaml +\item[{$\square$}] Lambda code [0\%] +\begin{itemize} +\item[{$\square$}] Untyped lambda form +\item[{$\square$}] OCaml specific instructions +\end{itemize} +\item[{$\boxminus$}] Pattern matching [50\%] +\begin{itemize} +\item[{$\boxtimes$}] Introduzione +\item[{$\square$}] Compilation to lambda form +\end{itemize} +\item[{$\square$}] Translation Verification +\item[{$\square$}] Symbolic execution +\end{itemize} +\item[{$\square$}] Translation verification of the Pattern Matching Compiler +\begin{itemize} +\item[{$\square$}] Source translation +\begin{itemize} +\item[{$\square$}] Formal Grammar +\item[{$\square$}] Compilation of source patterns +\end{itemize} +\item[{$\square$}] Target translation +\begin{itemize} +\item[{$\square$}] Formal Grammar +\item[{$\square$}] Symbolic execution of the lambda target +\end{itemize} +\item[{$\square$}] Equivalence between source and target +\end{itemize} +\begin{itemize} +\item[{$\square$}] Practical results +\end{itemize} +\end{itemize} + + +\begin{abstract} + +This dissertation presents an algorithm for the translation validation of the OCaml +pattern matching compiler. Given the source representation of the target program and the +target program compiled in untyped lambda form, the algoritmhm is capable of modelling +the source program in terms of symbolic constraints on it's branches and apply symbolic +execution on the untyped lambda representation in order to validate wheter the compilation +produced a valid result. +In this context a valid result means that for every input in the domain of the source +program the untyped lambda translation produces the same output as the source program. +The input of the program is modelled in terms of symbolic constraints closely related to +the runtime representation of OCaml objects and the output consists of OCaml code +blackboxes that are not evaluated in the context of the verification. + +\end{abstract} + +\section{Background} +\label{sec:org06597c8} + +\subsection{OCaml} +\label{sec:org8d0180f} +Objective Caml (OCaml) is a dialect of the ML (Meta-Language) family of programming +languages. +OCaml shares many features with other dialects of ML, such as SML and Caml Light, +The main features of ML languages are the use of the Hindley-Milner type system that +provides many advantages with respect to static type systems of traditional imperative and object +oriented language such as C, C++ and Java, such as: +\begin{itemize} +\item Parametric polymorphism: in certain scenarios a function can accept more than one +type for the input parameters. For example a function that computes the lenght of a +list doesn't need to inspect the type of the elements of the list and for this reason +a List.length function can accept list of integers, list of strings and in general +list of any type. Such languages offer polymorphic functions through subtyping at +runtime only, while other languages such as C++ offer polymorphism through compile +time templates and function overloading. +With the Hindley-Milner type system each well typed function can have more than one +type but always has a unique best type, called the \emph{principal type}. +For example the principal type of the List.length function is "For any \emph{a}, function from +list of \emph{a} to \emph{int}" and \emph{a} is called the \emph{type parameter}. +\item Strong typing: Languages such as C and C++ allow the programmer to operate on data +without considering its type, mainly through pointers. Other languages such as C\# +and Go allow type erasure so at runtime the type of the data can't be queried. +In the case of programming languages using an Hindley-Milner type system the +programmer is not allowed to operate on data by ignoring or promoting its type. +\item Type Inference: the principal type of a well formed term can be inferred without any +annotation or declaration. +\item Algebraic data types: types that are modelled by the use of two +algebraic operations, sum and product. +A sum type is a type that can hold of many different types of +objects, but only one at a time. For example the sum type defined +as \emph{A + B} can hold at any moment a value of type A or a value of +type B. Sum types are also called tagged union or variants. +A product type is a type constructed as a direct product +of multiple types and contains at any moment one instance for +every type of its operands. Product types are also called tuples +or records. Algebraic data types can be recursive +in their definition and can be combined. +\end{itemize} +Moreover ML languages are functional, meaning that functions are +treated as first class citizens and variables are immutable, +although mutable statements and imperative constructs are permitted. +In addition to that OCaml features an object system, that provides +inheritance, subtyping and dynamic binding, and modules, that +provide a way to encapsulate definitions. Modules are checked +statically and can be reificated through functors. + +\begin{enumerate} +\item {\bfseries\sffamily TODO} Pattern matching [37\%] +\label{sec:orgd3cffc0} +\begin{itemize} +\item[{$\square$}] capisci come mettere gli esempi uno accanto all'altro +\end{itemize} + +Pattern matching is a widely adopted mechanism to interact with ADT. +C family languages provide branching on predicates through the use of +if statements and switch statements. +Pattern matching is a mechanism for destructuring and analyzing data +structures for the presence of values simbolically represented as +tokens. One common example of pattern matching is the use of regular +expressions on strings. OCaml provides pattern matching on ADT, +primitive data types. + +\begin{itemize} +\item[{$\boxtimes$}] Esempio enum, C e Ocaml +\end{itemize} +\begin{verbatim} +type color = | Red | Blue | Green + +begin match color with +| Red -> print "red" +| Blue -> print "red" +| Green -> print "red" + +\end{verbatim} + +OCaml provides tokens to express data destructoring + + +\begin{itemize} +\item[{$\boxtimes$}] Esempio destructor list +\end{itemize} +\begin{verbatim} + +begin match list with +| [ ] -> print "empty list" +| element1 :: [ ] -> print "one element" +| element1 :: element2 :: [ ] -> print "two elements" +| head :: tail-> print "head followed by many elements" +\end{verbatim} + +\begin{itemize} +\item[{$\boxtimes$}] Esempio destructor tuples +\end{itemize} +\begin{verbatim} + +begin match tuple with +| (Some _, Some _) -> print "Pair of optional types" +| (Some _, None) -> print "Pair of optional types, last null" +| (None, Some _) -> print "Pair of optional types, first null" +| (None, None) -> print "Pair of optional types, both null" +\end{verbatim} + +Pattern clauses can make the use of \emph{guards} to test predicates and +variables can be binded in scope. + +\begin{itemize} +\item[{$\square$}] Esempio binding e guards +\end{itemize} +\begin{verbatim} + +begin match token_list with +| "switch"::var::"{"::rest -> +| "case"::":"::var::rest when is_int var -> +| "case"::":"::var::rest when is_string var -> +| "}"::[ ] -> stop () +| "}"::rest -> error "syntax error: " rest + +\end{verbatim} + +\begin{itemize} +\item[{$\square$}] Un altro esempio con destructors e tutto i lresto +\end{itemize} + +In general pattern matching on primitive and algebraic data types takes the +following form. + +\begin{itemize} +\item[{$\square$}] Esempio informale +\end{itemize} + +It can be described more formally through a BNF grammar. + +\begin{itemize} +\item[{$\square$}] BNF + +\item[{$\square$}] Come funziona il pattern matching? +\end{itemize} + +\item {\bfseries\sffamily TODO} 1.2.1 Pattern matching compilation to lambda code +\label{sec:orgc04d093} + +\begin{itemize} +\item[{$\square$}] Da tabella a matrice +\end{itemize} + +Formally, pattern are defined as follows: +\begin{center} +\begin{tabular}{ll} +pattern & Patterns\\ +\hline +\_ & wildcard\\ +x & variable\\ +c(p₁,p₂,\ldots{},pₙ) & constructor pattern\\ +(p₁\(\vert{}\) p₂) & or-pattern\\ +\end{tabular} +\end{center} + +Values are defined as follows: +\begin{center} +\begin{tabular}{ll} +values & Values\\ +\hline +c(v₁, v₂, \ldots{}, vₙ) & constructor value\\ +\end{tabular} +\end{center} + +The entire pattern matching code can be represented as a clause matrix +that associates rows of patterns (p\(_{\text{i,1}}\), p\(_{\text{i,2}}\), \ldots{}, p\(_{\text{i,n}}\)) to +lambda code action lⁱ +\begin{equation*} +(P → L) = +\begin{pmatrix} +p_{1,1} & p_{1,2} & \cdots & p_{1,n} & → l₁ \\ +p_{2,1} & p_{2,2} & \cdots & p_{2,n} & → l₂ \\ +\vdots & \vdots & \ddots & \vdots & → \vdots \\ +p_{m,1} & p_{m,2} & \cdots & p_{m,n} & → lₘ +\end{pmatrix} +\end{equation*} + +Most native data types in OCaml, such as integers, tuples, lists, +records, can be seen as instances of the following definition + +\begin{verbatim} +type t = Nil | One of int | Cons of int * t +\end{verbatim} +that is a type \emph{t} with three constructors that define its complete +signature. +Every constructor has an arity. Nil, a constructor of arity 0, is +called a constant constructor. + +The pattern \emph{p} matches a value \emph{v}, written as p ≼ v, when one of the +following rules apply + +\begin{center} +\begin{tabular}{llll} + & & & \\ +\hline +\_ & ≼ & v & ∀v\\ +x & ≼ & v & ∀v\\ +(p₁ \(\vert{}\)$\backslash$ p₂) & ≼ & v & iff p₁ ≼ v or p₂ ≼ v\\ +c(p₁, p₂, \ldots{}, pₐ) & ≼ & c(v₁, v₂, \ldots{}, vₐ) & iff (p₁, p₂, \ldots{}, pₐ) ≼ (v₁, v₂, \ldots{}, vₐ)\\ +(p₁, p₂, \ldots{}, pₐ) & ≼ & (v₁, v₂, \ldots{}, vₐ) & iff pᵢ ≼ vᵢ ∀i ∈ [1..a]\\ +\hline +\end{tabular} +\end{center} + +We can also say that \emph{v} is an \emph{instance} of \emph{p}. + +When we consider the pattern matrix P we say that the value vector +\(\vec{v}\) = (v₁, v₂, \ldots{}, vᵢ) matches the line number i in P if and only if the following two +conditions are satisfied: +\begin{itemize} +\item p\(_{\text{i,1}}\), p\(_{\text{i,2}}\), \(\cdots{}\), p\(_{\text{i,n}}\) ≼ (v₁, v₂, \ldots{}, vᵢ) +\item ∀j < i p\(_{\text{j,1}}\), p\(_{\text{j,2}}\), \(\cdots{}\), p\(_{\text{j,n}}\) ⋠ (v₁, v₂, \ldots{}, vᵢ) +\end{itemize} + +We can define the following three relations with respect to patterns: +\begin{itemize} +\item Patter p is less precise than pattern q, written p ≼ q, when all +instances of q are instances of p +\item Pattern p and q are equivalent, written p ≡ q, when their instances +are the same +\item Patterns p and q are compatible when they share a common instance +\end{itemize} +\end{enumerate} + +\subsection{1.2.1.1 Initial state of the compilation} +\label{sec:orgc758fe3} + +Given a source of the following form: + +\#+BEGIN\_SRC ocaml +match x with +\begin{center} +\begin{tabular}{l} +p₁ -> e₁\\ +p₂ -> e₂\\ +\end{tabular} +\end{center} +\ldots{} +\begin{center} +\begin{tabular}{l} +pₘ -> eₘ\\ +\end{tabular} +\end{center} +\#+END\_SRC ocaml + +the initial input of the algorithm consists of a vector of variables +\(\vec{x}\) = (x₁, x₂, \ldots{}, xₙ) of size n +and a clause matrix P → L of width n and height m. + +\begin{equation*} +(P → L) = +\begin{pmatrix} +p_{1,1} & p_{1,2} & \cdots & p_{1,n} → l₁ \\ +p_{2,1} & p_{2,2} & \cdots & p_{2,n} → l₂ \\ +\vdots & \vdots & \ddots \vdots → \vdots \\ +p_{m,1} & p_{m,2} & \cdots & p_{m,n} → lₘ +\end{pmatrix} +\end{equation*} +\end{document} diff --git a/tesi/unicode-latex.json b/tesi/unicode-latex.json index 6cd9664..9a49ee8 100644 --- a/tesi/unicode-latex.json +++ b/tesi/unicode-latex.json @@ -521,7 +521,7 @@ "⋝":"\\eqgtr", "⋞":"\\curlyeqprec", "⋟":"\\curlyeqsucc", -"⋠":"\\npreccurlyeq", +"⋠":"\\npreceq", "⋡":"\\nsucccurlyeq", "⋢":"\\nsqsubseteq", "⋣":"\\nsqsupseteq",