script conversione
This commit is contained in:
parent
3e96fafb7f
commit
ae930205b7
8 changed files with 727 additions and 380 deletions
278
tesi/.gitignore
vendored
Normal file
278
tesi/.gitignore
vendored
Normal file
|
@ -0,0 +1,278 @@
|
||||||
|
tesi.tex
|
||||||
|
|
||||||
|
## Core latex/pdflatex auxiliary files:
|
||||||
|
*.aux
|
||||||
|
*.lof
|
||||||
|
*.log
|
||||||
|
*.lot
|
||||||
|
*.fls
|
||||||
|
*.out
|
||||||
|
*.toc
|
||||||
|
*.fmt
|
||||||
|
*.fot
|
||||||
|
*.cb
|
||||||
|
*.cb2
|
||||||
|
.*.lb
|
||||||
|
|
||||||
|
## Intermediate documents:
|
||||||
|
*.dvi
|
||||||
|
*.xdv
|
||||||
|
*-converted-to.*
|
||||||
|
# these rules might exclude image files for figures etc.
|
||||||
|
# *.ps
|
||||||
|
# *.eps
|
||||||
|
# *.pdf
|
||||||
|
|
||||||
|
## Generated if empty string is given at "Please type another file name for output:"
|
||||||
|
.pdf
|
||||||
|
|
||||||
|
## Bibliography auxiliary files (bibtex/biblatex/biber):
|
||||||
|
*.bbl
|
||||||
|
*.bcf
|
||||||
|
*.blg
|
||||||
|
*-blx.aux
|
||||||
|
*-blx.bib
|
||||||
|
*.run.xml
|
||||||
|
|
||||||
|
## Build tool auxiliary files:
|
||||||
|
*.fdb_latexmk
|
||||||
|
*.synctex
|
||||||
|
*.synctex(busy)
|
||||||
|
*.synctex.gz
|
||||||
|
*.synctex.gz(busy)
|
||||||
|
*.pdfsync
|
||||||
|
|
||||||
|
## Build tool directories for auxiliary files
|
||||||
|
# latexrun
|
||||||
|
latex.out/
|
||||||
|
|
||||||
|
## Auxiliary and intermediate files from other packages:
|
||||||
|
# algorithms
|
||||||
|
*.alg
|
||||||
|
*.loa
|
||||||
|
|
||||||
|
# achemso
|
||||||
|
acs-*.bib
|
||||||
|
|
||||||
|
# amsthm
|
||||||
|
*.thm
|
||||||
|
|
||||||
|
# beamer
|
||||||
|
*.nav
|
||||||
|
*.pre
|
||||||
|
*.snm
|
||||||
|
*.vrb
|
||||||
|
|
||||||
|
# changes
|
||||||
|
*.soc
|
||||||
|
|
||||||
|
# comment
|
||||||
|
*.cut
|
||||||
|
|
||||||
|
# cprotect
|
||||||
|
*.cpt
|
||||||
|
|
||||||
|
# elsarticle (documentclass of Elsevier journals)
|
||||||
|
*.spl
|
||||||
|
|
||||||
|
# endnotes
|
||||||
|
*.ent
|
||||||
|
|
||||||
|
# fixme
|
||||||
|
*.lox
|
||||||
|
|
||||||
|
# feynmf/feynmp
|
||||||
|
*.mf
|
||||||
|
*.mp
|
||||||
|
*.t[1-9]
|
||||||
|
*.t[1-9][0-9]
|
||||||
|
*.tfm
|
||||||
|
|
||||||
|
#(r)(e)ledmac/(r)(e)ledpar
|
||||||
|
*.end
|
||||||
|
*.?end
|
||||||
|
*.[1-9]
|
||||||
|
*.[1-9][0-9]
|
||||||
|
*.[1-9][0-9][0-9]
|
||||||
|
*.[1-9]R
|
||||||
|
*.[1-9][0-9]R
|
||||||
|
*.[1-9][0-9][0-9]R
|
||||||
|
*.eledsec[1-9]
|
||||||
|
*.eledsec[1-9]R
|
||||||
|
*.eledsec[1-9][0-9]
|
||||||
|
*.eledsec[1-9][0-9]R
|
||||||
|
*.eledsec[1-9][0-9][0-9]
|
||||||
|
*.eledsec[1-9][0-9][0-9]R
|
||||||
|
|
||||||
|
# glossaries
|
||||||
|
*.acn
|
||||||
|
*.acr
|
||||||
|
*.glg
|
||||||
|
*.glo
|
||||||
|
*.gls
|
||||||
|
*.glsdefs
|
||||||
|
*.lzo
|
||||||
|
*.lzs
|
||||||
|
|
||||||
|
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
|
||||||
|
# *.ist
|
||||||
|
|
||||||
|
# gnuplottex
|
||||||
|
*-gnuplottex-*
|
||||||
|
|
||||||
|
# gregoriotex
|
||||||
|
*.gaux
|
||||||
|
*.gtex
|
||||||
|
|
||||||
|
# htlatex
|
||||||
|
*.4ct
|
||||||
|
*.4tc
|
||||||
|
*.idv
|
||||||
|
*.lg
|
||||||
|
*.trc
|
||||||
|
*.xref
|
||||||
|
|
||||||
|
# hyperref
|
||||||
|
*.brf
|
||||||
|
|
||||||
|
# knitr
|
||||||
|
*-concordance.tex
|
||||||
|
# TODO Comment the next line if you want to keep your tikz graphics files
|
||||||
|
*.tikz
|
||||||
|
*-tikzDictionary
|
||||||
|
|
||||||
|
# listings
|
||||||
|
*.lol
|
||||||
|
|
||||||
|
# luatexja-ruby
|
||||||
|
*.ltjruby
|
||||||
|
|
||||||
|
# makeidx
|
||||||
|
*.idx
|
||||||
|
*.ilg
|
||||||
|
*.ind
|
||||||
|
|
||||||
|
# minitoc
|
||||||
|
*.maf
|
||||||
|
*.mlf
|
||||||
|
*.mlt
|
||||||
|
*.mtc[0-9]*
|
||||||
|
*.slf[0-9]*
|
||||||
|
*.slt[0-9]*
|
||||||
|
*.stc[0-9]*
|
||||||
|
|
||||||
|
# minted
|
||||||
|
_minted*
|
||||||
|
*.pyg
|
||||||
|
|
||||||
|
# morewrites
|
||||||
|
*.mw
|
||||||
|
|
||||||
|
# nomencl
|
||||||
|
*.nlg
|
||||||
|
*.nlo
|
||||||
|
*.nls
|
||||||
|
|
||||||
|
# pax
|
||||||
|
*.pax
|
||||||
|
|
||||||
|
# pdfpcnotes
|
||||||
|
*.pdfpc
|
||||||
|
|
||||||
|
# sagetex
|
||||||
|
*.sagetex.sage
|
||||||
|
*.sagetex.py
|
||||||
|
*.sagetex.scmd
|
||||||
|
|
||||||
|
# scrwfile
|
||||||
|
*.wrt
|
||||||
|
|
||||||
|
# sympy
|
||||||
|
*.sout
|
||||||
|
*.sympy
|
||||||
|
sympy-plots-for-*.tex/
|
||||||
|
|
||||||
|
# pdfcomment
|
||||||
|
*.upa
|
||||||
|
*.upb
|
||||||
|
|
||||||
|
# pythontex
|
||||||
|
*.pytxcode
|
||||||
|
pythontex-files-*/
|
||||||
|
|
||||||
|
# tcolorbox
|
||||||
|
*.listing
|
||||||
|
|
||||||
|
# thmtools
|
||||||
|
*.loe
|
||||||
|
|
||||||
|
# TikZ & PGF
|
||||||
|
*.dpth
|
||||||
|
*.md5
|
||||||
|
*.auxlock
|
||||||
|
|
||||||
|
# todonotes
|
||||||
|
*.tdo
|
||||||
|
|
||||||
|
# vhistory
|
||||||
|
*.hst
|
||||||
|
*.ver
|
||||||
|
|
||||||
|
# easy-todo
|
||||||
|
*.lod
|
||||||
|
|
||||||
|
# xcolor
|
||||||
|
*.xcp
|
||||||
|
|
||||||
|
# xmpincl
|
||||||
|
*.xmpi
|
||||||
|
|
||||||
|
# xindy
|
||||||
|
*.xdy
|
||||||
|
|
||||||
|
# xypic precompiled matrices and outlines
|
||||||
|
*.xyc
|
||||||
|
*.xyd
|
||||||
|
|
||||||
|
# endfloat
|
||||||
|
*.ttt
|
||||||
|
*.fff
|
||||||
|
|
||||||
|
# Latexian
|
||||||
|
TSWLatexianTemp*
|
||||||
|
|
||||||
|
## Editors:
|
||||||
|
# WinEdt
|
||||||
|
*.bak
|
||||||
|
*.sav
|
||||||
|
|
||||||
|
# Texpad
|
||||||
|
.texpadtmp
|
||||||
|
|
||||||
|
# LyX
|
||||||
|
*.lyx~
|
||||||
|
|
||||||
|
# Kile
|
||||||
|
*.backup
|
||||||
|
|
||||||
|
# gummi
|
||||||
|
.*.swp
|
||||||
|
|
||||||
|
# KBibTeX
|
||||||
|
*~[0-9]*
|
||||||
|
|
||||||
|
# TeXnicCenter
|
||||||
|
*.tps
|
||||||
|
|
||||||
|
# auto folder when using emacs and auctex
|
||||||
|
./auto/*
|
||||||
|
*.el
|
||||||
|
|
||||||
|
# expex forward references with \gathertags
|
||||||
|
*-tags.tex
|
||||||
|
|
||||||
|
# standalone packages
|
||||||
|
*.sta
|
||||||
|
|
||||||
|
# Makeindex log files
|
||||||
|
*.lpz
|
|
@ -1,6 +1,6 @@
|
||||||
SRC = tesi.tex
|
SRC = tesi.tex
|
||||||
AUX = tesi.aux
|
AUX = tesi.aux
|
||||||
DEL = tesi.aux tesi.bbl tesi.blg tesi.log tesi.out
|
DEL = tesi.aux tesi.bbl tesi.blg tesi.log tesi.out tesi_unicode.tex tesi.pdf tesi.tex texput.log
|
||||||
NAME = tesi
|
NAME = tesi
|
||||||
|
|
||||||
tesi:
|
tesi:
|
||||||
|
@ -9,9 +9,14 @@ tesi:
|
||||||
rm -f $(DEL)
|
rm -f $(DEL)
|
||||||
@echo
|
@echo
|
||||||
@echo "=== Building from scratch ==="
|
@echo "=== Building from scratch ==="
|
||||||
|
emacs tesi_unicode.org -f org-latex-export-to-latex --kill
|
||||||
|
python3 conv.py tesi_unicode.tex tesi.tex
|
||||||
pdflatex $(SRC)
|
pdflatex $(SRC)
|
||||||
bibtex $(AUX)
|
bibtex $(AUX)
|
||||||
pdflatex $(SRC)
|
pdflatex $(SRC)
|
||||||
pdflatex $(SRC)
|
pdflatex $(SRC)
|
||||||
@echo
|
@echo
|
||||||
@echo "=== All done. Generated $(NAME).pdf ==="
|
@echo "=== All done. Generated $(NAME).pdf ==="
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f $(DEL)
|
||||||
|
|
40
tesi/conv.py
40
tesi/conv.py
|
@ -1,21 +1,45 @@
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
from sys import argv
|
from sys import argv
|
||||||
|
|
||||||
allsymbols = json.load(open('./unicode-latex.json'))
|
allsymbols = json.load(open('./unicode-latex.json'))
|
||||||
symbols = ['≼', '→', '⊀', '⋠', '≺', '∀', '∈', '₂', '₁', 'ₐ', 'ₘ', 'ₙ', 'ᵢ' ]
|
mysymbols = ['≡', '≼', '→', '⊀', '⋠', '≺', '∀', '∈', '₂', '₁', 'ₐ', 'ₘ', 'ₙ', 'ᵢ', 'ⁱ']
|
||||||
symbols = {s: allsymbols[s] for s in symbols}
|
|
||||||
print(symbols)
|
|
||||||
|
|
||||||
|
symbols = {s: allsymbols[s] for s in mysymbols}
|
||||||
|
mathsymbols = {s: '$'+allsymbols[s]+'$' for s in symbols}
|
||||||
|
|
||||||
def read_by_char(fname):
|
def read_by_char(fname):
|
||||||
|
# Yield character and True/False if inside mathmode block
|
||||||
|
mathmode = False
|
||||||
|
mathmode_begin = set(['\\begin{equation*}', '\\begin{equation}'])
|
||||||
|
mathmode_end = set(['\\end{equation*}', '\\end{equation}'])
|
||||||
|
cnt = 0
|
||||||
with open(fname, 'r') as fp:
|
with open(fname, 'r') as fp:
|
||||||
for line in fp.readlines():
|
for line in fp.readlines():
|
||||||
|
cnt += 1
|
||||||
|
words = [w.strip() for w in line.split(' ')]
|
||||||
|
if mathmode_begin.intersection(words):
|
||||||
|
assert mathmode == False
|
||||||
|
mathmode = True
|
||||||
|
elif mathmode_end.intersection(words):
|
||||||
|
assert mathmode == True, f'Line: {words}, number: {cnt}'
|
||||||
|
mathmode = False
|
||||||
|
|
||||||
for ch in line:
|
for ch in line:
|
||||||
yield ch
|
yield ch, mathmode
|
||||||
|
|
||||||
def convert(ch):
|
def convert(ch, mathmode):
|
||||||
return symbols[ch] if ch in symbols else ch
|
if not mathmode:
|
||||||
|
return mathsymbols[ch] if ch in mathsymbols else ch
|
||||||
|
else:
|
||||||
|
return symbols[ch] if ch in symbols else ch
|
||||||
|
|
||||||
newfile = [convert(ch) for ch in read_by_char(argv[1])]
|
# convert symbols except the one requiring math mode modifiers
|
||||||
|
# all passes produces a list of words that must be joined by ' '.join( )
|
||||||
|
firstpass = ''.join([convert(*c) for c in read_by_char(argv[1])]).split(' ')
|
||||||
|
# secondpass = insert_math(''.join(firstpass).split(' '))
|
||||||
|
# thirdpass = escape_outside_mathmode(firstpass)
|
||||||
|
|
||||||
|
newfile = ' '.join(firstpass)
|
||||||
with open(argv[2], 'w') as f:
|
with open(argv[2], 'w') as f:
|
||||||
f.writelines(newfile)
|
f.write(newfile)
|
||||||
|
|
320
tesi/prova.md
320
tesi/prova.md
|
@ -1,320 +0,0 @@
|
||||||
|
|
||||||
# TODO Scaletta <code>[1/2]</code>
|
|
||||||
|
|
||||||
- [X] Abstract
|
|
||||||
- [-] Background <code>[25%]</code>
|
|
||||||
- [X] Ocaml
|
|
||||||
- [ ] Lambda code
|
|
||||||
- [ ] Pattern matching
|
|
||||||
- [ ] Translation Verification
|
|
||||||
- [ ] Symbolic execution
|
|
||||||
|
|
||||||
\begin{abstract}
|
|
||||||
|
|
||||||
This dissertation presents an algorithm for the translation validation of the OCaml
|
|
||||||
pattern matching compiler. Given the source representation of the target program and the
|
|
||||||
target program compiled in untyped lambda form, the algoritmhm is capable of modelling
|
|
||||||
the source program in terms of symbolic constraints on it's branches and apply symbolic
|
|
||||||
execution on the untyped lambda representation in order to validate wheter the compilation
|
|
||||||
produced a valid result.
|
|
||||||
In this context a valid result means that for every input in the domain of the source
|
|
||||||
program the untyped lambda translation produces the same output as the source program.
|
|
||||||
The input of the program is modelled in terms of symbolic constraints closely related to
|
|
||||||
the runtime representation of OCaml objects and the output consists of OCaml code
|
|
||||||
blackboxes that are not evaluated in the context of the verification.
|
|
||||||
|
|
||||||
\end{abstract}
|
|
||||||
|
|
||||||
|
|
||||||
# 1. Background
|
|
||||||
|
|
||||||
|
|
||||||
## 1.1 OCaml
|
|
||||||
|
|
||||||
Objective Caml (OCaml) is a dialect of the ML (Meta-Language) family of programming
|
|
||||||
languages.
|
|
||||||
OCaml shares many features with other dialects of ML, such as SML and Caml Light,
|
|
||||||
The main features of ML languages are the use of the Hindley-Milner type system that
|
|
||||||
provides many advantages with respect to static type systems of traditional imperative and object
|
|
||||||
oriented language such as C, C++ and Java, such as:
|
|
||||||
|
|
||||||
- Parametric polymorphism: in certain scenarios a function can accept more than one
|
|
||||||
|
|
||||||
type for the input parameters. For example a function that computes the lenght of a
|
|
||||||
list doesn't need to inspect the type of the elements of the list and for this reason
|
|
||||||
a List.length function can accept list of integers, list of strings and in general
|
|
||||||
list of any type. Such languages offer polymorphic functions through subtyping at
|
|
||||||
runtime only, while other languages such as C++ offer polymorphism through compile
|
|
||||||
time templates and function overloading.
|
|
||||||
With the Hindley-Milner type system each well typed function can have more than one
|
|
||||||
type but always has a unique best type, called the *principal type*.
|
|
||||||
For example the principal type of the List.length function is "For any *a*, function from
|
|
||||||
list of *a* to *int*" and *a* is called the *type parameter*.
|
|
||||||
|
|
||||||
- Strong typing: Languages such as C and C++ allow the programmer to operate on data
|
|
||||||
|
|
||||||
without considering its type, mainly through pointers. Other languages such as C#
|
|
||||||
and Go allow type erasure so at runtime the type of the data can't be queried.
|
|
||||||
In the case of programming languages using an Hindley-Milner type system the
|
|
||||||
programmer is not allowed to operate on data by ignoring or promoting its type.
|
|
||||||
|
|
||||||
- Type Inference: the principal type of a well formed term can be inferred without any
|
|
||||||
|
|
||||||
annotation or declaration.
|
|
||||||
|
|
||||||
- Algebraic data types: types that are modelled by the use of two
|
|
||||||
|
|
||||||
algebraic operations, sum and product.
|
|
||||||
A sum type is a type that can hold of many different types of
|
|
||||||
objects, but only one at a time. For example the sum type defined
|
|
||||||
as *A + B* can hold at any moment a value of type A or a value of
|
|
||||||
type B. Sum types are also called tagged union or variants.
|
|
||||||
A product type is a type constructed as a direct product
|
|
||||||
of multiple types and contains at any moment one instance for
|
|
||||||
every type of its operands. Product types are also called tuples
|
|
||||||
or records. Algebraic data types can be recursive
|
|
||||||
in their definition and can be combined.
|
|
||||||
Moreover ML languages are functional, meaning that functions are
|
|
||||||
treated as first class citizens and variables are immutable,
|
|
||||||
although mutable statements and imperative constructs are permitted.
|
|
||||||
In addition to that OCaml features an object system, that provides
|
|
||||||
inheritance, subtyping and dynamic binding, and modules, that
|
|
||||||
provide a way to encapsulate definitions. Modules are checked
|
|
||||||
statically and can be reificated through functors.
|
|
||||||
|
|
||||||
1. TODO 1.2 Pattern matching <code>[37%]</code>
|
|
||||||
|
|
||||||
- [ ] capisci come mettere gli esempi uno accanto all'altro
|
|
||||||
|
|
||||||
Pattern matching is a widely adopted mechanism to interact with ADT.
|
|
||||||
C family languages provide branching on predicates through the use of
|
|
||||||
if statements and switch statements.
|
|
||||||
Pattern matching is a mechanism for destructuring and analyzing data
|
|
||||||
structures for the presence of values simbolically represented as
|
|
||||||
tokens. One common example of pattern matching is the use of regular
|
|
||||||
expressions on strings. OCaml provides pattern matching on ADT,
|
|
||||||
primitive data types.
|
|
||||||
|
|
||||||
- [X] Esempio enum, C e Ocaml
|
|
||||||
|
|
||||||
type color = | Red | Blue | Green
|
|
||||||
|
|
||||||
begin match color with
|
|
||||||
| Red -> print "red"
|
|
||||||
| Blue -> print "red"
|
|
||||||
| Green -> print "red"
|
|
||||||
|
|
||||||
OCaml provides tokens to express data destructoring
|
|
||||||
|
|
||||||
- [X] Esempio destructor list
|
|
||||||
|
|
||||||
|
|
||||||
begin match list with
|
|
||||||
| [ ] -> print "empty list"
|
|
||||||
| element1 :: [ ] -> print "one element"
|
|
||||||
| element1 :: element2 :: [ ] -> print "two elements"
|
|
||||||
| head :: tail-> print "head followed by many elements"
|
|
||||||
|
|
||||||
- [X] Esempio destructor tuples
|
|
||||||
|
|
||||||
|
|
||||||
begin match tuple with
|
|
||||||
| (Some _, Some _) -> print "Pair of optional types"
|
|
||||||
| (Some _, None) -> print "Pair of optional types, last null"
|
|
||||||
| (None, Some _) -> print "Pair of optional types, first null"
|
|
||||||
| (None, None) -> print "Pair of optional types, both null"
|
|
||||||
|
|
||||||
Pattern clauses can make the use of *guards* to test predicates and
|
|
||||||
variables can be binded in scope.
|
|
||||||
|
|
||||||
- [ ] Esempio binding e guards
|
|
||||||
|
|
||||||
|
|
||||||
begin match token_list with
|
|
||||||
| "switch"::var::"{"::rest ->
|
|
||||||
| "case"::":"::var::rest when is_int var ->
|
|
||||||
| "case"::":"::var::rest when is_string var ->
|
|
||||||
| "}"::[ ] -> stop ()
|
|
||||||
| "}"::rest -> error "syntax error: " rest
|
|
||||||
|
|
||||||
- [ ] Un altro esempio con destructors e tutto i lresto
|
|
||||||
|
|
||||||
In general pattern matching on primitive and algebraic data types takes the
|
|
||||||
following form.
|
|
||||||
|
|
||||||
- [ ] Esempio informale
|
|
||||||
|
|
||||||
It can be described more formally through a BNF grammar.
|
|
||||||
|
|
||||||
- [ ] BNF
|
|
||||||
|
|
||||||
- [ ] Come funziona il pattern matching?
|
|
||||||
|
|
||||||
2. TODO 1.2.1 Pattern matching compilation to lambda code
|
|
||||||
|
|
||||||
- [ ] Da tabella a matrice
|
|
||||||
|
|
||||||
Formally, pattern and values are defined as follow:
|
|
||||||
|
|
||||||
<table border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
||||||
|
|
||||||
|
|
||||||
<colgroup>
|
|
||||||
<col class="org-left" />
|
|
||||||
|
|
||||||
<col class="org-left" />
|
|
||||||
</colgroup>
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th scope="col" class="org-left">pattern ::=</th>
|
|
||||||
<th scope="col" class="org-left">Patterns</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td class="org-left">\_</td>
|
|
||||||
<td class="org-left">wildcard</td>
|
|
||||||
</tr>
|
|
||||||
|
|
||||||
|
|
||||||
<tr>
|
|
||||||
<td class="org-left">x</td>
|
|
||||||
<td class="org-left">variable</td>
|
|
||||||
</tr>
|
|
||||||
|
|
||||||
|
|
||||||
<tr>
|
|
||||||
<td class="org-left">c(p1,p2,…,pn</td>
|
|
||||||
<td class="org-left">constructor pattern</td>
|
|
||||||
</tr>
|
|
||||||
|
|
||||||
|
|
||||||
<tr>
|
|
||||||
<td class="org-left">(p1| p2)</td>
|
|
||||||
<td class="org-left">or-pattern</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
<table border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
||||||
|
|
||||||
|
|
||||||
<colgroup>
|
|
||||||
<col class="org-left" />
|
|
||||||
|
|
||||||
<col class="org-left" />
|
|
||||||
</colgroup>
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th scope="col" class="org-left">values ::=</th>
|
|
||||||
<th scope="col" class="org-left">Values</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td class="org-left">c(v1, v2, …, vn)</td>
|
|
||||||
<td class="org-left">constructor value</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
The entire pattern matching code can be represented as a clause matrix
|
|
||||||
that associates rows of patterns (p<sub>i,1</sub>, p<sub>i,2</sub>, …, p<sub>i,n</sub>) to
|
|
||||||
lambda code action lⁱ
|
|
||||||
|
|
||||||
\begin{equation*}
|
|
||||||
(P → L) =
|
|
||||||
\begin{pmatrix}
|
|
||||||
p_{1,1} & p_{1,2} & \cdots & p_{1,n} → l₁ \\
|
|
||||||
p_{2,1} & p_{2,2} & \cdots & p_{2,n} → l₂ \\
|
|
||||||
\vdots & \vdots & \ddots \vdots → \vdots \\
|
|
||||||
p_{m,1} & p_{m,2} & \cdots & p_{m,n} → lₘ
|
|
||||||
\end{pmatrix}
|
|
||||||
\end{equation*}
|
|
||||||
|
|
||||||
Most native data types in OCaml, such as integers, tuples, lists,
|
|
||||||
records, can be seen as instances of the following definition
|
|
||||||
|
|
||||||
type t = Nil | One of int | Cons of int * t
|
|
||||||
|
|
||||||
that is a type *t* with three constructors that define its complete
|
|
||||||
signature.
|
|
||||||
Every constructor has an arity. Nil, a constructor of arity 0, is
|
|
||||||
called a constant constructor.
|
|
||||||
|
|
||||||
The pattern *p* matches a value *v*, written as p ≼ v, when
|
|
||||||
one of the following rules apply
|
|
||||||
|
|
||||||
<table border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
||||||
|
|
||||||
|
|
||||||
<colgroup>
|
|
||||||
<col class="org-left" />
|
|
||||||
|
|
||||||
<col class="org-left" />
|
|
||||||
|
|
||||||
<col class="org-left" />
|
|
||||||
|
|
||||||
<col class="org-left" />
|
|
||||||
</colgroup>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td class="org-left">\_</td>
|
|
||||||
<td class="org-left">≼</td>
|
|
||||||
<td class="org-left">v</td>
|
|
||||||
<td class="org-left"> </td>
|
|
||||||
</tr>
|
|
||||||
|
|
||||||
|
|
||||||
<tr>
|
|
||||||
<td class="org-left">x</td>
|
|
||||||
<td class="org-left">≼</td>
|
|
||||||
<td class="org-left">v</td>
|
|
||||||
<td class="org-left"> </td>
|
|
||||||
</tr>
|
|
||||||
|
|
||||||
|
|
||||||
<tr>
|
|
||||||
<td class="org-left">(p₁ |\\ p₂)</td>
|
|
||||||
<td class="org-left">≼</td>
|
|
||||||
<td class="org-left">v</td>
|
|
||||||
<td class="org-left">iff p₁ ≼ v or p₂ ≼ v</td>
|
|
||||||
</tr>
|
|
||||||
|
|
||||||
|
|
||||||
<tr>
|
|
||||||
<td class="org-left">c(p₁, p₂, …, pₐ)</td>
|
|
||||||
<td class="org-left">≼</td>
|
|
||||||
<td class="org-left">c(v₁, v₂, …, vₐ)</td>
|
|
||||||
<td class="org-left">iff (p₁, p₂, …, pₐ) ≼ (v₁, v₂, …, vₐ)</td>
|
|
||||||
</tr>
|
|
||||||
|
|
||||||
|
|
||||||
<tr>
|
|
||||||
<td class="org-left">(p₁, p₂, …, pₐ)</td>
|
|
||||||
<td class="org-left">≼</td>
|
|
||||||
<td class="org-left">(v₁, v₂, …, vₐ)</td>
|
|
||||||
<td class="org-left">iff pᵢ ≼ vᵢ ∀i ∈ [1..a]</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
We can also say that *v* is an *instance* of *p*.
|
|
||||||
|
|
||||||
When we consider the pattern matrix P we say that the value vector
|
|
||||||
\vv{v} = (v₁, v₂, …, vᵢ) matches the line number i in P if and only if the following two
|
|
||||||
conditions are satisfied:
|
|
||||||
|
|
||||||
- \[ p_{i,1} & p_{i,2} & \cdots & p_{i,n} \] ≼ (v₁, v₂, …, vᵢ)
|
|
||||||
- \[ ∀j < i p_{j,1} & p_{j,2} & \cdots & p_{j,n} \] ⋠ (v₁, v₂, …, vᵢ)
|
|
||||||
|
|
||||||
We can define the following three relations with respect to patterns:
|
|
||||||
|
|
||||||
- Patter p is less precise than pattern q, writtens p ≼ q when all
|
|
||||||
instances of q are instances of p
|
|
||||||
- Pattern p and q are equivalent, written p ≡ q, when their instances
|
|
||||||
are the same
|
|
||||||
- Patterns p and q are compatible when they share a common instance
|
|
||||||
|
|
BIN
tesi/tesi.pdf
Normal file
BIN
tesi/tesi.pdf
Normal file
Binary file not shown.
|
@ -32,8 +32,6 @@
|
||||||
#+LaTeX_HEADER: \usepackage{algorithm}
|
#+LaTeX_HEADER: \usepackage{algorithm}
|
||||||
#+LaTeX_HEADER: \usepackage{algpseudocode}
|
#+LaTeX_HEADER: \usepackage{algpseudocode}
|
||||||
#+LaTeX_HEADER: \usepackage{amsmath,amssymb,amsthm}
|
#+LaTeX_HEADER: \usepackage{amsmath,amssymb,amsthm}
|
||||||
#+LaTeX_HEADER: \usepackage[utf8]{inputenc}
|
|
||||||
#+LaTeX_HEADER: \usepackage[T1]{fontenc}
|
|
||||||
#+Latex_HEADER: \newtheorem{definition}{Definition}
|
#+Latex_HEADER: \newtheorem{definition}{Definition}
|
||||||
#+LaTeX_HEADER: \usepackage{graphicx}
|
#+LaTeX_HEADER: \usepackage{graphicx}
|
||||||
#+LaTeX_HEADER: \usepackage{listings}
|
#+LaTeX_HEADER: \usepackage{listings}
|
||||||
|
@ -58,9 +56,9 @@ blackboxes that are not evaluated in the context of the verification.
|
||||||
|
|
||||||
\end{abstract}
|
\end{abstract}
|
||||||
|
|
||||||
* 1. Background
|
* Background
|
||||||
|
|
||||||
** 1.1 OCaml
|
** OCaml
|
||||||
Objective Caml (OCaml) is a dialect of the ML (Meta-Language) family of programming
|
Objective Caml (OCaml) is a dialect of the ML (Meta-Language) family of programming
|
||||||
languages.
|
languages.
|
||||||
OCaml shares many features with other dialects of ML, such as SML and Caml Light,
|
OCaml shares many features with other dialects of ML, such as SML and Caml Light,
|
||||||
|
@ -68,34 +66,34 @@ The main features of ML languages are the use of the Hindley-Milner type system
|
||||||
provides many advantages with respect to static type systems of traditional imperative and object
|
provides many advantages with respect to static type systems of traditional imperative and object
|
||||||
oriented language such as C, C++ and Java, such as:
|
oriented language such as C, C++ and Java, such as:
|
||||||
- Parametric polymorphism: in certain scenarios a function can accept more than one
|
- Parametric polymorphism: in certain scenarios a function can accept more than one
|
||||||
type for the input parameters. For example a function that computes the lenght of a
|
type for the input parameters. For example a function that computes the lenght of a
|
||||||
list doesn't need to inspect the type of the elements of the list and for this reason
|
list doesn't need to inspect the type of the elements of the list and for this reason
|
||||||
a List.length function can accept list of integers, list of strings and in general
|
a List.length function can accept list of integers, list of strings and in general
|
||||||
list of any type. Such languages offer polymorphic functions through subtyping at
|
list of any type. Such languages offer polymorphic functions through subtyping at
|
||||||
runtime only, while other languages such as C++ offer polymorphism through compile
|
runtime only, while other languages such as C++ offer polymorphism through compile
|
||||||
time templates and function overloading.
|
time templates and function overloading.
|
||||||
With the Hindley-Milner type system each well typed function can have more than one
|
With the Hindley-Milner type system each well typed function can have more than one
|
||||||
type but always has a unique best type, called the /principal type/.
|
type but always has a unique best type, called the /principal type/.
|
||||||
For example the principal type of the List.length function is "For any /a/, function from
|
For example the principal type of the List.length function is "For any /a/, function from
|
||||||
list of /a/ to /int/" and /a/ is called the /type parameter/.
|
list of /a/ to /int/" and /a/ is called the /type parameter/.
|
||||||
- Strong typing: Languages such as C and C++ allow the programmer to operate on data
|
- Strong typing: Languages such as C and C++ allow the programmer to operate on data
|
||||||
without considering its type, mainly through pointers. Other languages such as C#
|
without considering its type, mainly through pointers. Other languages such as C#
|
||||||
and Go allow type erasure so at runtime the type of the data can't be queried.
|
and Go allow type erasure so at runtime the type of the data can't be queried.
|
||||||
In the case of programming languages using an Hindley-Milner type system the
|
In the case of programming languages using an Hindley-Milner type system the
|
||||||
programmer is not allowed to operate on data by ignoring or promoting its type.
|
programmer is not allowed to operate on data by ignoring or promoting its type.
|
||||||
- Type Inference: the principal type of a well formed term can be inferred without any
|
- Type Inference: the principal type of a well formed term can be inferred without any
|
||||||
annotation or declaration.
|
annotation or declaration.
|
||||||
- Algebraic data types: types that are modelled by the use of two
|
- Algebraic data types: types that are modelled by the use of two
|
||||||
algebraic operations, sum and product.
|
algebraic operations, sum and product.
|
||||||
A sum type is a type that can hold of many different types of
|
A sum type is a type that can hold of many different types of
|
||||||
objects, but only one at a time. For example the sum type defined
|
objects, but only one at a time. For example the sum type defined
|
||||||
as /A + B/ can hold at any moment a value of type A or a value of
|
as /A + B/ can hold at any moment a value of type A or a value of
|
||||||
type B. Sum types are also called tagged union or variants.
|
type B. Sum types are also called tagged union or variants.
|
||||||
A product type is a type constructed as a direct product
|
A product type is a type constructed as a direct product
|
||||||
of multiple types and contains at any moment one instance for
|
of multiple types and contains at any moment one instance for
|
||||||
every type of its operands. Product types are also called tuples
|
every type of its operands. Product types are also called tuples
|
||||||
or records. Algebraic data types can be recursive
|
or records. Algebraic data types can be recursive
|
||||||
in their definition and can be combined.
|
in their definition and can be combined.
|
||||||
Moreover ML languages are functional, meaning that functions are
|
Moreover ML languages are functional, meaning that functions are
|
||||||
treated as first class citizens and variables are immutable,
|
treated as first class citizens and variables are immutable,
|
||||||
although mutable statements and imperative constructs are permitted.
|
although mutable statements and imperative constructs are permitted.
|
||||||
|
@ -104,7 +102,7 @@ inheritance, subtyping and dynamic binding, and modules, that
|
||||||
provide a way to encapsulate definitions. Modules are checked
|
provide a way to encapsulate definitions. Modules are checked
|
||||||
statically and can be reificated through functors.
|
statically and can be reificated through functors.
|
||||||
|
|
||||||
*** TODO 1.2 Pattern matching [37%]
|
*** TODO Pattern matching [37%]
|
||||||
- [ ] capisci come mettere gli esempi uno accanto all'altro
|
- [ ] capisci come mettere gli esempi uno accanto all'altro
|
||||||
|
|
||||||
Pattern matching is a widely adopted mechanism to interact with ADT.
|
Pattern matching is a widely adopted mechanism to interact with ADT.
|
||||||
|
@ -183,15 +181,15 @@ It can be described more formally through a BNF grammar.
|
||||||
- [ ] Da tabella a matrice
|
- [ ] Da tabella a matrice
|
||||||
|
|
||||||
Formally, pattern are defined as follows:
|
Formally, pattern are defined as follows:
|
||||||
| pattern ::= | Patterns |
|
| pattern | Patterns |
|
||||||
|----------------+---------------------|
|
|-----------------+---------------------|
|
||||||
| _ | wildcard |
|
| _ | wildcard |
|
||||||
| x | variable |
|
| x | variable |
|
||||||
| c(p₁,p₂,...,pₙ | constructor pattern |
|
| c(p₁,p₂,...,pₙ) | constructor pattern |
|
||||||
| (p₁\vert p₂) | or-pattern |
|
| (p₁\vert p₂) | or-pattern |
|
||||||
|
|
||||||
Values are defined as follows:
|
Values are defined as follows:
|
||||||
| values ::= | Values |
|
| values | Values |
|
||||||
|---------------------+-------------------|
|
|---------------------+-------------------|
|
||||||
| c(v₁, v₂, ..., vₙ) | constructor value |
|
| c(v₁, v₂, ..., vₙ) | constructor value |
|
||||||
|
|
||||||
|
@ -201,10 +199,10 @@ lambda code action lⁱ
|
||||||
\begin{equation*}
|
\begin{equation*}
|
||||||
(P → L) =
|
(P → L) =
|
||||||
\begin{pmatrix}
|
\begin{pmatrix}
|
||||||
p_{1,1} & p_{1,2} & \cdots & p_{1,n} → l₁ \\
|
p_{1,1} & p_{1,2} & \cdots & p_{1,n} & → l₁ \\
|
||||||
p_{2,1} & p_{2,2} & \cdots & p_{2,n} → l₂ \\
|
p_{2,1} & p_{2,2} & \cdots & p_{2,n} & → l₂ \\
|
||||||
\vdots & \vdots & \ddots \vdots → \vdots \\
|
\vdots & \vdots & \ddots & \vdots & → \vdots \\
|
||||||
p_{m,1} & p_{m,2} & \cdots & p_{m,n} → lₘ
|
p_{m,1} & p_{m,2} & \cdots & p_{m,n} & → lₘ
|
||||||
\end{pmatrix}
|
\end{pmatrix}
|
||||||
\end{equation*}
|
\end{equation*}
|
||||||
|
|
||||||
|
@ -219,24 +217,28 @@ signature.
|
||||||
Every constructor has an arity. Nil, a constructor of arity 0, is
|
Every constructor has an arity. Nil, a constructor of arity 0, is
|
||||||
called a constant constructor.
|
called a constant constructor.
|
||||||
|
|
||||||
The pattern /p/ matches a value /v/, written as p ≼ v, when
|
The pattern /p/ matches a value /v/, written as p ≼ v, when one of the
|
||||||
one of the following rules apply
|
following rules apply
|
||||||
|
|
||||||
| _ | ≼ | v | |
|
| | | | |
|
||||||
| x | ≼ | v | |
|
|--------------------+---+--------------------+-------------------------------------------|
|
||||||
|
| _ | ≼ | v | ∀v |
|
||||||
|
| x | ≼ | v | ∀v |
|
||||||
| (p₁ \vert\ p₂) | ≼ | v | iff p₁ ≼ v or p₂ ≼ v |
|
| (p₁ \vert\ p₂) | ≼ | v | iff p₁ ≼ v or p₂ ≼ v |
|
||||||
| c(p₁, p₂, ..., pₐ) | ≼ | c(v₁, v₂, ..., vₐ) | iff (p₁, p₂, ..., pₐ) ≼ (v₁, v₂, ..., vₐ) |
|
| c(p₁, p₂, ..., pₐ) | ≼ | c(v₁, v₂, ..., vₐ) | iff (p₁, p₂, ..., pₐ) ≼ (v₁, v₂, ..., vₐ) |
|
||||||
| (p₁, p₂, ..., pₐ) | ≼ | (v₁, v₂, ..., vₐ) | iff pᵢ ≼ vᵢ ∀i ∈ [1..a] |
|
| (p₁, p₂, ..., pₐ) | ≼ | (v₁, v₂, ..., vₐ) | iff pᵢ ≼ vᵢ ∀i ∈ [1..a] |
|
||||||
|
|--------------------+---+--------------------+-------------------------------------------|
|
||||||
|
|
||||||
We can also say that /v/ is an /instance/ of /p/.
|
We can also say that /v/ is an /instance/ of /p/.
|
||||||
|
|
||||||
When we consider the pattern matrix P we say that the value vector
|
When we consider the pattern matrix P we say that the value vector
|
||||||
\vv{v} = (v₁, v₂, ..., vᵢ) matches the line number i in P if and only if the following two
|
$\vec{v}$ = (v₁, v₂, ..., vᵢ) matches the line number i in P if and only if the following two
|
||||||
conditions are satisfied:
|
conditions are satisfied:
|
||||||
- \[ p_{i,1} & p_{i,2} & \cdots & p_{i,n} \] ≼ (v₁, v₂, ..., vᵢ)
|
- p_{i,1}, p_{i,2}, \cdots, p_{i,n} ≼ (v₁, v₂, ..., vᵢ)
|
||||||
- \[ ∀j < i p_{j,1} & p_{j,2} & \cdots & p_{j,n} \] ⋠ (v₁, v₂, ..., vᵢ)
|
- ∀j < i p_{j,1}, p_{j,2}, \cdots, p_{j,n} ⋠ (v₁, v₂, ..., vᵢ)
|
||||||
|
|
||||||
We can define the following three relations with respect to patterns:
|
We can define the following three relations with respect to patterns:
|
||||||
- Patter p is less precise than pattern q, writtens p ≼ q when all
|
- Patter p is less precise than pattern q, written p ≼ q, when all
|
||||||
instances of q are instances of p
|
instances of q are instances of p
|
||||||
- Pattern p and q are equivalent, written p ≡ q, when their instances
|
- Pattern p and q are equivalent, written p ≡ q, when their instances
|
||||||
are the same
|
are the same
|
||||||
|
@ -255,7 +257,7 @@ match x with
|
||||||
#+END_SRC ocaml
|
#+END_SRC ocaml
|
||||||
|
|
||||||
the initial input of the algorithm consists of a vector of variables
|
the initial input of the algorithm consists of a vector of variables
|
||||||
\vv{x} = (x₁, x₂, ..., xₙ) of size n
|
$\vec{x}$ = (x₁, x₂, ..., xₙ) of size n
|
||||||
and a clause matrix P → L of width n and height m.
|
and a clause matrix P → L of width n and height m.
|
||||||
|
|
||||||
\begin{equation*}
|
\begin{equation*}
|
||||||
|
|
358
tesi/tesi_unicode.tex
Normal file
358
tesi/tesi_unicode.tex
Normal file
|
@ -0,0 +1,358 @@
|
||||||
|
% Created 2020-02-24 Mon 14:35
|
||||||
|
% Intended LaTeX compiler: pdflatex
|
||||||
|
\documentclass[11pt]{article}
|
||||||
|
\usepackage[utf8]{inputenc}
|
||||||
|
\usepackage[T1]{fontenc}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{grffile}
|
||||||
|
\usepackage{longtable}
|
||||||
|
\usepackage{wrapfig}
|
||||||
|
\usepackage{rotating}
|
||||||
|
\usepackage[normalem]{ulem}
|
||||||
|
\usepackage{amsmath}
|
||||||
|
\usepackage{textcomp}
|
||||||
|
\usepackage{amssymb}
|
||||||
|
\usepackage{capt-of}
|
||||||
|
\usepackage{hyperref}
|
||||||
|
\usepackage[utf8]{inputenc}
|
||||||
|
\usepackage{algorithm}
|
||||||
|
\usepackage{algpseudocode}
|
||||||
|
\usepackage{amsmath,amssymb,amsthm}
|
||||||
|
\newtheorem{definition}{Definition}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{listings}
|
||||||
|
\usepackage{color}
|
||||||
|
\author{Francesco Mecca}
|
||||||
|
\date{}
|
||||||
|
\title{Translation Verification of the OCaml pattern matching compiler}
|
||||||
|
\hypersetup{
|
||||||
|
pdfauthor={Francesco Mecca},
|
||||||
|
pdftitle={Translation Verification of the OCaml pattern matching compiler},
|
||||||
|
pdfkeywords={},
|
||||||
|
pdfsubject={},
|
||||||
|
pdfcreator={Emacs 26.3 (Org mode 9.1.9)},
|
||||||
|
pdflang={English}}
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\maketitle
|
||||||
|
\section{{\bfseries\sffamily TODO} Scaletta [1/2]}
|
||||||
|
\label{sec:orgd539212}
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\boxtimes$}] Abstract
|
||||||
|
\item[{$\boxminus$}] Background [20\%]
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\boxtimes$}] Ocaml
|
||||||
|
\item[{$\square$}] Lambda code [0\%]
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\square$}] Untyped lambda form
|
||||||
|
\item[{$\square$}] OCaml specific instructions
|
||||||
|
\end{itemize}
|
||||||
|
\item[{$\boxminus$}] Pattern matching [50\%]
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\boxtimes$}] Introduzione
|
||||||
|
\item[{$\square$}] Compilation to lambda form
|
||||||
|
\end{itemize}
|
||||||
|
\item[{$\square$}] Translation Verification
|
||||||
|
\item[{$\square$}] Symbolic execution
|
||||||
|
\end{itemize}
|
||||||
|
\item[{$\square$}] Translation verification of the Pattern Matching Compiler
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\square$}] Source translation
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\square$}] Formal Grammar
|
||||||
|
\item[{$\square$}] Compilation of source patterns
|
||||||
|
\end{itemize}
|
||||||
|
\item[{$\square$}] Target translation
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\square$}] Formal Grammar
|
||||||
|
\item[{$\square$}] Symbolic execution of the lambda target
|
||||||
|
\end{itemize}
|
||||||
|
\item[{$\square$}] Equivalence between source and target
|
||||||
|
\end{itemize}
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\square$}] Practical results
|
||||||
|
\end{itemize}
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{abstract}
|
||||||
|
|
||||||
|
This dissertation presents an algorithm for the translation validation of the OCaml
|
||||||
|
pattern matching compiler. Given the source representation of the target program and the
|
||||||
|
target program compiled in untyped lambda form, the algoritmhm is capable of modelling
|
||||||
|
the source program in terms of symbolic constraints on it's branches and apply symbolic
|
||||||
|
execution on the untyped lambda representation in order to validate wheter the compilation
|
||||||
|
produced a valid result.
|
||||||
|
In this context a valid result means that for every input in the domain of the source
|
||||||
|
program the untyped lambda translation produces the same output as the source program.
|
||||||
|
The input of the program is modelled in terms of symbolic constraints closely related to
|
||||||
|
the runtime representation of OCaml objects and the output consists of OCaml code
|
||||||
|
blackboxes that are not evaluated in the context of the verification.
|
||||||
|
|
||||||
|
\end{abstract}
|
||||||
|
|
||||||
|
\section{Background}
|
||||||
|
\label{sec:org06597c8}
|
||||||
|
|
||||||
|
\subsection{OCaml}
|
||||||
|
\label{sec:org8d0180f}
|
||||||
|
Objective Caml (OCaml) is a dialect of the ML (Meta-Language) family of programming
|
||||||
|
languages.
|
||||||
|
OCaml shares many features with other dialects of ML, such as SML and Caml Light,
|
||||||
|
The main features of ML languages are the use of the Hindley-Milner type system that
|
||||||
|
provides many advantages with respect to static type systems of traditional imperative and object
|
||||||
|
oriented language such as C, C++ and Java, such as:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Parametric polymorphism: in certain scenarios a function can accept more than one
|
||||||
|
type for the input parameters. For example a function that computes the lenght of a
|
||||||
|
list doesn't need to inspect the type of the elements of the list and for this reason
|
||||||
|
a List.length function can accept list of integers, list of strings and in general
|
||||||
|
list of any type. Such languages offer polymorphic functions through subtyping at
|
||||||
|
runtime only, while other languages such as C++ offer polymorphism through compile
|
||||||
|
time templates and function overloading.
|
||||||
|
With the Hindley-Milner type system each well typed function can have more than one
|
||||||
|
type but always has a unique best type, called the \emph{principal type}.
|
||||||
|
For example the principal type of the List.length function is "For any \emph{a}, function from
|
||||||
|
list of \emph{a} to \emph{int}" and \emph{a} is called the \emph{type parameter}.
|
||||||
|
\item Strong typing: Languages such as C and C++ allow the programmer to operate on data
|
||||||
|
without considering its type, mainly through pointers. Other languages such as C\#
|
||||||
|
and Go allow type erasure so at runtime the type of the data can't be queried.
|
||||||
|
In the case of programming languages using an Hindley-Milner type system the
|
||||||
|
programmer is not allowed to operate on data by ignoring or promoting its type.
|
||||||
|
\item Type Inference: the principal type of a well formed term can be inferred without any
|
||||||
|
annotation or declaration.
|
||||||
|
\item Algebraic data types: types that are modelled by the use of two
|
||||||
|
algebraic operations, sum and product.
|
||||||
|
A sum type is a type that can hold of many different types of
|
||||||
|
objects, but only one at a time. For example the sum type defined
|
||||||
|
as \emph{A + B} can hold at any moment a value of type A or a value of
|
||||||
|
type B. Sum types are also called tagged union or variants.
|
||||||
|
A product type is a type constructed as a direct product
|
||||||
|
of multiple types and contains at any moment one instance for
|
||||||
|
every type of its operands. Product types are also called tuples
|
||||||
|
or records. Algebraic data types can be recursive
|
||||||
|
in their definition and can be combined.
|
||||||
|
\end{itemize}
|
||||||
|
Moreover ML languages are functional, meaning that functions are
|
||||||
|
treated as first class citizens and variables are immutable,
|
||||||
|
although mutable statements and imperative constructs are permitted.
|
||||||
|
In addition to that OCaml features an object system, that provides
|
||||||
|
inheritance, subtyping and dynamic binding, and modules, that
|
||||||
|
provide a way to encapsulate definitions. Modules are checked
|
||||||
|
statically and can be reificated through functors.
|
||||||
|
|
||||||
|
\begin{enumerate}
|
||||||
|
\item {\bfseries\sffamily TODO} Pattern matching [37\%]
|
||||||
|
\label{sec:orgd3cffc0}
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\square$}] capisci come mettere gli esempi uno accanto all'altro
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
Pattern matching is a widely adopted mechanism to interact with ADT.
|
||||||
|
C family languages provide branching on predicates through the use of
|
||||||
|
if statements and switch statements.
|
||||||
|
Pattern matching is a mechanism for destructuring and analyzing data
|
||||||
|
structures for the presence of values simbolically represented as
|
||||||
|
tokens. One common example of pattern matching is the use of regular
|
||||||
|
expressions on strings. OCaml provides pattern matching on ADT,
|
||||||
|
primitive data types.
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\boxtimes$}] Esempio enum, C e Ocaml
|
||||||
|
\end{itemize}
|
||||||
|
\begin{verbatim}
|
||||||
|
type color = | Red | Blue | Green
|
||||||
|
|
||||||
|
begin match color with
|
||||||
|
| Red -> print "red"
|
||||||
|
| Blue -> print "red"
|
||||||
|
| Green -> print "red"
|
||||||
|
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
OCaml provides tokens to express data destructoring
|
||||||
|
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\boxtimes$}] Esempio destructor list
|
||||||
|
\end{itemize}
|
||||||
|
\begin{verbatim}
|
||||||
|
|
||||||
|
begin match list with
|
||||||
|
| [ ] -> print "empty list"
|
||||||
|
| element1 :: [ ] -> print "one element"
|
||||||
|
| element1 :: element2 :: [ ] -> print "two elements"
|
||||||
|
| head :: tail-> print "head followed by many elements"
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\boxtimes$}] Esempio destructor tuples
|
||||||
|
\end{itemize}
|
||||||
|
\begin{verbatim}
|
||||||
|
|
||||||
|
begin match tuple with
|
||||||
|
| (Some _, Some _) -> print "Pair of optional types"
|
||||||
|
| (Some _, None) -> print "Pair of optional types, last null"
|
||||||
|
| (None, Some _) -> print "Pair of optional types, first null"
|
||||||
|
| (None, None) -> print "Pair of optional types, both null"
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
Pattern clauses can make the use of \emph{guards} to test predicates and
|
||||||
|
variables can be binded in scope.
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\square$}] Esempio binding e guards
|
||||||
|
\end{itemize}
|
||||||
|
\begin{verbatim}
|
||||||
|
|
||||||
|
begin match token_list with
|
||||||
|
| "switch"::var::"{"::rest ->
|
||||||
|
| "case"::":"::var::rest when is_int var ->
|
||||||
|
| "case"::":"::var::rest when is_string var ->
|
||||||
|
| "}"::[ ] -> stop ()
|
||||||
|
| "}"::rest -> error "syntax error: " rest
|
||||||
|
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\square$}] Un altro esempio con destructors e tutto i lresto
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
In general pattern matching on primitive and algebraic data types takes the
|
||||||
|
following form.
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\square$}] Esempio informale
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
It can be described more formally through a BNF grammar.
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\square$}] BNF
|
||||||
|
|
||||||
|
\item[{$\square$}] Come funziona il pattern matching?
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\item {\bfseries\sffamily TODO} 1.2.1 Pattern matching compilation to lambda code
|
||||||
|
\label{sec:orgc04d093}
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item[{$\square$}] Da tabella a matrice
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
Formally, pattern are defined as follows:
|
||||||
|
\begin{center}
|
||||||
|
\begin{tabular}{ll}
|
||||||
|
pattern & Patterns\\
|
||||||
|
\hline
|
||||||
|
\_ & wildcard\\
|
||||||
|
x & variable\\
|
||||||
|
c(p₁,p₂,\ldots{},pₙ) & constructor pattern\\
|
||||||
|
(p₁\(\vert{}\) p₂) & or-pattern\\
|
||||||
|
\end{tabular}
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
Values are defined as follows:
|
||||||
|
\begin{center}
|
||||||
|
\begin{tabular}{ll}
|
||||||
|
values & Values\\
|
||||||
|
\hline
|
||||||
|
c(v₁, v₂, \ldots{}, vₙ) & constructor value\\
|
||||||
|
\end{tabular}
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
The entire pattern matching code can be represented as a clause matrix
|
||||||
|
that associates rows of patterns (p\(_{\text{i,1}}\), p\(_{\text{i,2}}\), \ldots{}, p\(_{\text{i,n}}\)) to
|
||||||
|
lambda code action lⁱ
|
||||||
|
\begin{equation*}
|
||||||
|
(P → L) =
|
||||||
|
\begin{pmatrix}
|
||||||
|
p_{1,1} & p_{1,2} & \cdots & p_{1,n} & → l₁ \\
|
||||||
|
p_{2,1} & p_{2,2} & \cdots & p_{2,n} & → l₂ \\
|
||||||
|
\vdots & \vdots & \ddots & \vdots & → \vdots \\
|
||||||
|
p_{m,1} & p_{m,2} & \cdots & p_{m,n} & → lₘ
|
||||||
|
\end{pmatrix}
|
||||||
|
\end{equation*}
|
||||||
|
|
||||||
|
Most native data types in OCaml, such as integers, tuples, lists,
|
||||||
|
records, can be seen as instances of the following definition
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
type t = Nil | One of int | Cons of int * t
|
||||||
|
\end{verbatim}
|
||||||
|
that is a type \emph{t} with three constructors that define its complete
|
||||||
|
signature.
|
||||||
|
Every constructor has an arity. Nil, a constructor of arity 0, is
|
||||||
|
called a constant constructor.
|
||||||
|
|
||||||
|
The pattern \emph{p} matches a value \emph{v}, written as p ≼ v, when one of the
|
||||||
|
following rules apply
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\begin{tabular}{llll}
|
||||||
|
& & & \\
|
||||||
|
\hline
|
||||||
|
\_ & ≼ & v & ∀v\\
|
||||||
|
x & ≼ & v & ∀v\\
|
||||||
|
(p₁ \(\vert{}\)$\backslash$ p₂) & ≼ & v & iff p₁ ≼ v or p₂ ≼ v\\
|
||||||
|
c(p₁, p₂, \ldots{}, pₐ) & ≼ & c(v₁, v₂, \ldots{}, vₐ) & iff (p₁, p₂, \ldots{}, pₐ) ≼ (v₁, v₂, \ldots{}, vₐ)\\
|
||||||
|
(p₁, p₂, \ldots{}, pₐ) & ≼ & (v₁, v₂, \ldots{}, vₐ) & iff pᵢ ≼ vᵢ ∀i ∈ [1..a]\\
|
||||||
|
\hline
|
||||||
|
\end{tabular}
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
We can also say that \emph{v} is an \emph{instance} of \emph{p}.
|
||||||
|
|
||||||
|
When we consider the pattern matrix P we say that the value vector
|
||||||
|
\(\vec{v}\) = (v₁, v₂, \ldots{}, vᵢ) matches the line number i in P if and only if the following two
|
||||||
|
conditions are satisfied:
|
||||||
|
\begin{itemize}
|
||||||
|
\item p\(_{\text{i,1}}\), p\(_{\text{i,2}}\), \(\cdots{}\), p\(_{\text{i,n}}\) ≼ (v₁, v₂, \ldots{}, vᵢ)
|
||||||
|
\item ∀j < i p\(_{\text{j,1}}\), p\(_{\text{j,2}}\), \(\cdots{}\), p\(_{\text{j,n}}\) ⋠ (v₁, v₂, \ldots{}, vᵢ)
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
We can define the following three relations with respect to patterns:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Patter p is less precise than pattern q, written p ≼ q, when all
|
||||||
|
instances of q are instances of p
|
||||||
|
\item Pattern p and q are equivalent, written p ≡ q, when their instances
|
||||||
|
are the same
|
||||||
|
\item Patterns p and q are compatible when they share a common instance
|
||||||
|
\end{itemize}
|
||||||
|
\end{enumerate}
|
||||||
|
|
||||||
|
\subsection{1.2.1.1 Initial state of the compilation}
|
||||||
|
\label{sec:orgc758fe3}
|
||||||
|
|
||||||
|
Given a source of the following form:
|
||||||
|
|
||||||
|
\#+BEGIN\_SRC ocaml
|
||||||
|
match x with
|
||||||
|
\begin{center}
|
||||||
|
\begin{tabular}{l}
|
||||||
|
p₁ -> e₁\\
|
||||||
|
p₂ -> e₂\\
|
||||||
|
\end{tabular}
|
||||||
|
\end{center}
|
||||||
|
\ldots{}
|
||||||
|
\begin{center}
|
||||||
|
\begin{tabular}{l}
|
||||||
|
pₘ -> eₘ\\
|
||||||
|
\end{tabular}
|
||||||
|
\end{center}
|
||||||
|
\#+END\_SRC ocaml
|
||||||
|
|
||||||
|
the initial input of the algorithm consists of a vector of variables
|
||||||
|
\(\vec{x}\) = (x₁, x₂, \ldots{}, xₙ) of size n
|
||||||
|
and a clause matrix P → L of width n and height m.
|
||||||
|
|
||||||
|
\begin{equation*}
|
||||||
|
(P → L) =
|
||||||
|
\begin{pmatrix}
|
||||||
|
p_{1,1} & p_{1,2} & \cdots & p_{1,n} → l₁ \\
|
||||||
|
p_{2,1} & p_{2,2} & \cdots & p_{2,n} → l₂ \\
|
||||||
|
\vdots & \vdots & \ddots \vdots → \vdots \\
|
||||||
|
p_{m,1} & p_{m,2} & \cdots & p_{m,n} → lₘ
|
||||||
|
\end{pmatrix}
|
||||||
|
\end{equation*}
|
||||||
|
\end{document}
|
|
@ -521,7 +521,7 @@
|
||||||
"⋝":"\\eqgtr",
|
"⋝":"\\eqgtr",
|
||||||
"⋞":"\\curlyeqprec",
|
"⋞":"\\curlyeqprec",
|
||||||
"⋟":"\\curlyeqsucc",
|
"⋟":"\\curlyeqsucc",
|
||||||
"⋠":"\\npreccurlyeq",
|
"⋠":"\\npreceq",
|
||||||
"⋡":"\\nsucccurlyeq",
|
"⋡":"\\nsucccurlyeq",
|
||||||
"⋢":"\\nsqsubseteq",
|
"⋢":"\\nsqsubseteq",
|
||||||
"⋣":"\\nsqsupseteq",
|
"⋣":"\\nsqsupseteq",
|
||||||
|
|
Loading…
Reference in a new issue