esercizi svm

This commit is contained in:
Francesco Mecca 2020-07-02 11:58:47 +02:00
parent 009ac7e338
commit 2540e7c3ee
3 changed files with 514 additions and 65 deletions

View file

@ -19,7 +19,9 @@
"from scipy.optimize import fmin_bfgs\n",
"import numpy as np\n",
"from numpy.linalg import norm\n",
"from numpy.linalg import inv"
"from numpy.linalg import inv\n",
"from numpy import transpose, identity\n",
"from numpy import zeros"
]
},
{
@ -31,32 +33,9 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 6.32000000e-03, 1.80000000e+01, 2.31000000e+00, ...,\n",
" 1.53000000e+01, 3.96900000e+02, 4.98000000e+00],\n",
" [ 2.73100000e-02, 0.00000000e+00, 7.07000000e+00, ...,\n",
" 1.78000000e+01, 3.96900000e+02, 9.14000000e+00],\n",
" [ 2.72900000e-02, 0.00000000e+00, 7.07000000e+00, ...,\n",
" 1.78000000e+01, 3.92830000e+02, 4.03000000e+00],\n",
" ..., \n",
" [ 6.07600000e-02, 0.00000000e+00, 1.19300000e+01, ...,\n",
" 2.10000000e+01, 3.96900000e+02, 5.64000000e+00],\n",
" [ 1.09590000e-01, 0.00000000e+00, 1.19300000e+01, ...,\n",
" 2.10000000e+01, 3.93450000e+02, 6.48000000e+00],\n",
" [ 4.74100000e-02, 0.00000000e+00, 1.19300000e+01, ...,\n",
" 2.10000000e+01, 3.96900000e+02, 7.88000000e+00]])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"boston = datasets.load_boston()\n",
"data = np.array(boston.data)"
@ -71,24 +50,23 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Boston House Prices dataset\n",
".. _boston_dataset:\n",
"\n",
"Notes\n",
"------\n",
"Data Set Characteristics: \n",
"Boston house prices dataset\n",
"---------------------------\n",
"\n",
"**Data Set Characteristics:** \n",
"\n",
" :Number of Instances: 506 \n",
"\n",
" :Number of Attributes: 13 numeric/categorical predictive\n",
" \n",
" :Median Value (attribute 14) is usually the target\n",
" :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.\n",
"\n",
" :Attribute Information (in order):\n",
" - CRIM per capita crime rate by town\n",
@ -111,7 +89,7 @@
" :Creator: Harrison, D. and Rubinfeld, D.L.\n",
"\n",
"This is a copy of UCI ML housing dataset.\n",
"http://archive.ics.uci.edu/ml/datasets/Housing\n",
"https://archive.ics.uci.edu/ml/machine-learning-databases/housing/\n",
"\n",
"\n",
"This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.\n",
@ -125,11 +103,10 @@
"The Boston house-price data has been used in many machine learning papers that address regression\n",
"problems. \n",
" \n",
"**References**\n",
".. topic:: References\n",
"\n",
" - Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.\n",
" - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.\n",
" - many more! (see http://archive.ics.uci.edu/ml/datasets/Housing)\n",
"\n"
]
}
@ -147,10 +124,8 @@
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"t = np.ones(len(data)).reshape(len(data),1)\n",
@ -167,10 +142,8 @@
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"X,y = data[0:400,:], target[0:400]\n",
@ -223,12 +196,192 @@
" where $y'_i$ is your model prediction for the i-th example, and $n$ is the number of examples."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"least squares: $(X^T X)^{-1}X^T y $"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"least_squares = lambda x,y: inv(x.T.dot(x)).dot(x.T.dot(y))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([-1.91246374e-01, 4.42289967e-02, 5.52207977e-02, 1.71631351e+00,\n",
" -1.49957220e+01, 4.88773025e+00, 2.60921031e-03, -1.29480799e+00,\n",
" 4.84787214e-01, -1.54006673e-02, -8.08795026e-01, -1.29230427e-03,\n",
" -5.17953791e-01, 2.86725996e+01])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"least_squares(X, y)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ridge regression: ŵ = (XᵀX + λI)⁻¹Xᵀy"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def ridge_regression(x, y, lmb):\n",
" I = identity(len(X[0]))\n",
" return inv(x.T.dot(x) + lmb * I).dot(x.T).dot(y)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([-1.92905668e-01, 4.45989360e-02, 4.80153773e-02, 1.70985336e+00,\n",
" -1.21920175e+01, 5.08501051e+00, 8.60369052e-04, -1.23267578e+00,\n",
" 4.67418151e-01, -1.51800832e-02, -7.48061272e-01, 7.58288257e-04,\n",
" -5.09848508e-01, 2.39216289e+01])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ridge_regression(X, y, 0.1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Lasso: w* = argmin_w (y-X·w)ᵀ(y-X·w) + λ‖w‖₁"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def lasso(w, x, y, lmb):\n",
" return (y - x.dot(w)).T.dot(y-x.dot(w)) + lmb * sum(w)\n",
"lasso_regression = lambda x, y, lmb: fmin_bfgs(lasso, zeros(len(x[0])), args= (x,y,lmb))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: Desired error not necessarily achieved due to precision loss.\n",
" Current function value: 8923.891671\n",
" Iterations: 18\n",
" Function evaluations: 1047\n",
" Gradient evaluations: 69\n"
]
},
{
"data": {
"text/plain": [
"array([-1.91323083e-01, 4.42279444e-02, 5.53631382e-02, 1.71490995e+00,\n",
" -1.50058027e+01, 4.89083693e+00, 2.62969748e-03, -1.29453598e+00,\n",
" 4.84604103e-01, -1.53869118e-02, -8.08349218e-01, -1.26958995e-03,\n",
" -5.17749228e-01, 2.86320548e+01])"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lasso_regression(X,y,0.1)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def S(actual, predicted):\n",
" from math import sqrt\n",
" return sqrt(sum((predicted[i] - actual[i])**2 for i in range(len(actual))) / len(actual))\n",
"predicted = lambda x, w: x.dot(w)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: Desired error not necessarily achieved due to precision loss.\n",
" Current function value: 8922.270593\n",
" Iterations: 18\n",
" Function evaluations: 555\n",
" Gradient evaluations: 37\n",
"Least squares training set s statistics: 4.722840838326382\n",
"Least squares test set s statistics: 6.155792280412581\n",
"Ridge regression training set s statistics: 4.734160907532518\n",
"Ridge regression test set s statistics: 5.98737876633626\n",
"Lasso regression training set s statistics: 4.722840845344215\n",
"Lasso regression test set s statistics: 6.155671334655423\n"
]
}
],
"source": [
"w_least = least_squares(X, y)\n",
"w_ridge = ridge_regression(X, y, 0.01)\n",
"w_ridge = ridge_regression(X, y, 0.2)\n",
"w_lasso = lasso_regression(X, y, 0.01)\n",
"print(\"Least squares training set s statistics:\", S(y, predicted(X, w_least)))\n",
"print(\"Least squares test set s statistics:\", S(y_test, predicted(X_test, w_least)))\n",
"print(\"Ridge regression training set s statistics:\", S(y, predicted(X, w_ridge)))\n",
"print(\"Ridge regression test set s statistics:\", S(y_test, predicted(X_test, w_ridge)))\n",
"print(\"Lasso regression training set s statistics:\", S(y, predicted(X, w_lasso)))\n",
"print(\"Lasso regression test set s statistics:\", S(y_test, predicted(X_test, w_lasso)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": []
}
@ -250,7 +403,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
"version": "3.7.7"
}
},
"nbformat": 4,

File diff suppressed because one or more lines are too long

View file

@ -122,12 +122,13 @@ invertibile
Possiamo inquadrare questo problema come un problema di minimizzazione
della norma di e. p = X·$\hat{w}$: L'intero problema consiste in:
| $minimize_{\hat{w}}\Vert X \hat{w} - y \Vert_2^2$
| minimize_ŵ ‖Xŵ-y‖²₂
La soluzione consiste nell'imporre l'ortogonalita` di e e C(X), ovvero
Xᵀ·e=0; quindi:
| Xᵀ·e = 0; e = y-X·ŵ
| Xᵀ(y-X·ŵ) = 0
| Xᵀy = XᵀXŵ
| ŵ = (XᵀX)⁻¹Xᵀy
| ŵ = (XᵀX)⁻¹Xᵀy (LSE)
**** Regularization
evitare l'overfitting applicando dei constraint sul weight vector.
Generalmente i pesi sono in media piccoli: ~shrinkage~.
@ -140,7 +141,7 @@ XᵀX per migliorare la stabilita` numerica dell'inversione
Si puo` anche usare ~lasso~ nel caso di soluzioni sparse
(least absolute shrinkage and selection operator)
che sostituisce ‖w‖₂ con ‖w‖₁=∑|wᵢ|
| w* = argmin_w (y-X·w)ᵀ(y-X·w) + λ‖w‖1
| w* = argmin_w (y-X·w)ᵀ(y-X·w) + λ‖w‖
Minimizzare la norma significa immaginare che X sia affetto da errore
D e minimizzare l'errore:
| (X+D)w = Xw + Dw
@ -335,7 +336,7 @@ mai viste.
Permette di trasformare un sistema induttivo in deduttivo
** TODO Path Through hyp. space
Vedi che vuole sapere
** TODO Trees
** Trees
** Rules
Ordered rules are a chain of /if-then-else/.
#+BEGIN_SRC