esercizi svm
This commit is contained in:
parent
009ac7e338
commit
2540e7c3ee
3 changed files with 514 additions and 65 deletions
|
@ -19,7 +19,9 @@
|
||||||
"from scipy.optimize import fmin_bfgs\n",
|
"from scipy.optimize import fmin_bfgs\n",
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"from numpy.linalg import norm\n",
|
"from numpy.linalg import norm\n",
|
||||||
"from numpy.linalg import inv"
|
"from numpy.linalg import inv\n",
|
||||||
|
"from numpy import transpose, identity\n",
|
||||||
|
"from numpy import zeros"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -31,32 +33,9 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"array([[ 6.32000000e-03, 1.80000000e+01, 2.31000000e+00, ...,\n",
|
|
||||||
" 1.53000000e+01, 3.96900000e+02, 4.98000000e+00],\n",
|
|
||||||
" [ 2.73100000e-02, 0.00000000e+00, 7.07000000e+00, ...,\n",
|
|
||||||
" 1.78000000e+01, 3.96900000e+02, 9.14000000e+00],\n",
|
|
||||||
" [ 2.72900000e-02, 0.00000000e+00, 7.07000000e+00, ...,\n",
|
|
||||||
" 1.78000000e+01, 3.92830000e+02, 4.03000000e+00],\n",
|
|
||||||
" ..., \n",
|
|
||||||
" [ 6.07600000e-02, 0.00000000e+00, 1.19300000e+01, ...,\n",
|
|
||||||
" 2.10000000e+01, 3.96900000e+02, 5.64000000e+00],\n",
|
|
||||||
" [ 1.09590000e-01, 0.00000000e+00, 1.19300000e+01, ...,\n",
|
|
||||||
" 2.10000000e+01, 3.93450000e+02, 6.48000000e+00],\n",
|
|
||||||
" [ 4.74100000e-02, 0.00000000e+00, 1.19300000e+01, ...,\n",
|
|
||||||
" 2.10000000e+01, 3.96900000e+02, 7.88000000e+00]])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"boston = datasets.load_boston()\n",
|
"boston = datasets.load_boston()\n",
|
||||||
"data = np.array(boston.data)"
|
"data = np.array(boston.data)"
|
||||||
|
@ -71,24 +50,23 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"Boston House Prices dataset\n",
|
".. _boston_dataset:\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Notes\n",
|
"Boston house prices dataset\n",
|
||||||
"------\n",
|
"---------------------------\n",
|
||||||
"Data Set Characteristics: \n",
|
"\n",
|
||||||
|
"**Data Set Characteristics:** \n",
|
||||||
"\n",
|
"\n",
|
||||||
" :Number of Instances: 506 \n",
|
" :Number of Instances: 506 \n",
|
||||||
"\n",
|
"\n",
|
||||||
" :Number of Attributes: 13 numeric/categorical predictive\n",
|
" :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.\n",
|
||||||
" \n",
|
|
||||||
" :Median Value (attribute 14) is usually the target\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" :Attribute Information (in order):\n",
|
" :Attribute Information (in order):\n",
|
||||||
" - CRIM per capita crime rate by town\n",
|
" - CRIM per capita crime rate by town\n",
|
||||||
|
@ -111,7 +89,7 @@
|
||||||
" :Creator: Harrison, D. and Rubinfeld, D.L.\n",
|
" :Creator: Harrison, D. and Rubinfeld, D.L.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This is a copy of UCI ML housing dataset.\n",
|
"This is a copy of UCI ML housing dataset.\n",
|
||||||
"http://archive.ics.uci.edu/ml/datasets/Housing\n",
|
"https://archive.ics.uci.edu/ml/machine-learning-databases/housing/\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.\n",
|
"This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.\n",
|
||||||
|
@ -125,11 +103,10 @@
|
||||||
"The Boston house-price data has been used in many machine learning papers that address regression\n",
|
"The Boston house-price data has been used in many machine learning papers that address regression\n",
|
||||||
"problems. \n",
|
"problems. \n",
|
||||||
" \n",
|
" \n",
|
||||||
"**References**\n",
|
".. topic:: References\n",
|
||||||
"\n",
|
"\n",
|
||||||
" - Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.\n",
|
" - Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.\n",
|
||||||
" - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.\n",
|
" - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.\n",
|
||||||
" - many more! (see http://archive.ics.uci.edu/ml/datasets/Housing)\n",
|
|
||||||
"\n"
|
"\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -147,10 +124,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 4,
|
||||||
"metadata": {
|
"metadata": {},
|
||||||
"collapsed": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"t = np.ones(len(data)).reshape(len(data),1)\n",
|
"t = np.ones(len(data)).reshape(len(data),1)\n",
|
||||||
|
@ -167,10 +142,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": 5,
|
||||||
"metadata": {
|
"metadata": {},
|
||||||
"collapsed": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"X,y = data[0:400,:], target[0:400]\n",
|
"X,y = data[0:400,:], target[0:400]\n",
|
||||||
|
@ -223,12 +196,192 @@
|
||||||
" where $y'_i$ is your model prediction for the i-th example, and $n$ is the number of examples."
|
" where $y'_i$ is your model prediction for the i-th example, and $n$ is the number of examples."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"least squares: $(X^T X)^{-1}X^T y $"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"least_squares = lambda x,y: inv(x.T.dot(x)).dot(x.T.dot(y))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"array([-1.91246374e-01, 4.42289967e-02, 5.52207977e-02, 1.71631351e+00,\n",
|
||||||
|
" -1.49957220e+01, 4.88773025e+00, 2.60921031e-03, -1.29480799e+00,\n",
|
||||||
|
" 4.84787214e-01, -1.54006673e-02, -8.08795026e-01, -1.29230427e-03,\n",
|
||||||
|
" -5.17953791e-01, 2.86725996e+01])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"least_squares(X, y)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Ridge regression: ŵ = (XᵀX + λI)⁻¹Xᵀy"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def ridge_regression(x, y, lmb):\n",
|
||||||
|
" I = identity(len(X[0]))\n",
|
||||||
|
" return inv(x.T.dot(x) + lmb * I).dot(x.T).dot(y)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"array([-1.92905668e-01, 4.45989360e-02, 4.80153773e-02, 1.70985336e+00,\n",
|
||||||
|
" -1.21920175e+01, 5.08501051e+00, 8.60369052e-04, -1.23267578e+00,\n",
|
||||||
|
" 4.67418151e-01, -1.51800832e-02, -7.48061272e-01, 7.58288257e-04,\n",
|
||||||
|
" -5.09848508e-01, 2.39216289e+01])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"ridge_regression(X, y, 0.1)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Lasso: w* = argmin_w (y-X·w)ᵀ(y-X·w) + λ‖w‖₁"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def lasso(w, x, y, lmb):\n",
|
||||||
|
" return (y - x.dot(w)).T.dot(y-x.dot(w)) + lmb * sum(w)\n",
|
||||||
|
"lasso_regression = lambda x, y, lmb: fmin_bfgs(lasso, zeros(len(x[0])), args= (x,y,lmb))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Warning: Desired error not necessarily achieved due to precision loss.\n",
|
||||||
|
" Current function value: 8923.891671\n",
|
||||||
|
" Iterations: 18\n",
|
||||||
|
" Function evaluations: 1047\n",
|
||||||
|
" Gradient evaluations: 69\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"array([-1.91323083e-01, 4.42279444e-02, 5.53631382e-02, 1.71490995e+00,\n",
|
||||||
|
" -1.50058027e+01, 4.89083693e+00, 2.62969748e-03, -1.29453598e+00,\n",
|
||||||
|
" 4.84604103e-01, -1.53869118e-02, -8.08349218e-01, -1.26958995e-03,\n",
|
||||||
|
" -5.17749228e-01, 2.86320548e+01])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"lasso_regression(X,y,0.1)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def S(actual, predicted):\n",
|
||||||
|
" from math import sqrt\n",
|
||||||
|
" return sqrt(sum((predicted[i] - actual[i])**2 for i in range(len(actual))) / len(actual))\n",
|
||||||
|
"predicted = lambda x, w: x.dot(w)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 21,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Warning: Desired error not necessarily achieved due to precision loss.\n",
|
||||||
|
" Current function value: 8922.270593\n",
|
||||||
|
" Iterations: 18\n",
|
||||||
|
" Function evaluations: 555\n",
|
||||||
|
" Gradient evaluations: 37\n",
|
||||||
|
"Least squares training set s statistics: 4.722840838326382\n",
|
||||||
|
"Least squares test set s statistics: 6.155792280412581\n",
|
||||||
|
"Ridge regression training set s statistics: 4.734160907532518\n",
|
||||||
|
"Ridge regression test set s statistics: 5.98737876633626\n",
|
||||||
|
"Lasso regression training set s statistics: 4.722840845344215\n",
|
||||||
|
"Lasso regression test set s statistics: 6.155671334655423\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"w_least = least_squares(X, y)\n",
|
||||||
|
"w_ridge = ridge_regression(X, y, 0.01)\n",
|
||||||
|
"w_ridge = ridge_regression(X, y, 0.2)\n",
|
||||||
|
"w_lasso = lasso_regression(X, y, 0.01)\n",
|
||||||
|
"print(\"Least squares training set s statistics:\", S(y, predicted(X, w_least)))\n",
|
||||||
|
"print(\"Least squares test set s statistics:\", S(y_test, predicted(X_test, w_least)))\n",
|
||||||
|
"print(\"Ridge regression training set s statistics:\", S(y, predicted(X, w_ridge)))\n",
|
||||||
|
"print(\"Ridge regression test set s statistics:\", S(y_test, predicted(X_test, w_ridge)))\n",
|
||||||
|
"print(\"Lasso regression training set s statistics:\", S(y, predicted(X, w_lasso)))\n",
|
||||||
|
"print(\"Lasso regression test set s statistics:\", S(y_test, predicted(X_test, w_lasso)))"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {},
|
||||||
"collapsed": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": []
|
||||||
}
|
}
|
||||||
|
@ -250,7 +403,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.7.5"
|
"version": "3.7.7"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -122,12 +122,13 @@ invertibile
|
||||||
Possiamo inquadrare questo problema come un problema di minimizzazione
|
Possiamo inquadrare questo problema come un problema di minimizzazione
|
||||||
della norma di e. p = X·$\hat{w}$: L'intero problema consiste in:
|
della norma di e. p = X·$\hat{w}$: L'intero problema consiste in:
|
||||||
| $minimize_{\hat{w}}\Vert X \hat{w} - y \Vert_2^2$
|
| $minimize_{\hat{w}}\Vert X \hat{w} - y \Vert_2^2$
|
||||||
|
| minimize_ŵ ‖Xŵ-y‖²₂
|
||||||
La soluzione consiste nell'imporre l'ortogonalita` di e e C(X), ovvero
|
La soluzione consiste nell'imporre l'ortogonalita` di e e C(X), ovvero
|
||||||
Xᵀ·e=0; quindi:
|
Xᵀ·e=0; quindi:
|
||||||
| Xᵀ·e = 0; e = y-X·ŵ
|
| Xᵀ·e = 0; e = y-X·ŵ
|
||||||
| Xᵀ(y-X·ŵ) = 0
|
| Xᵀ(y-X·ŵ) = 0
|
||||||
| Xᵀy = XᵀXŵ
|
| Xᵀy = XᵀXŵ
|
||||||
| ŵ = (XᵀX)⁻¹Xᵀy
|
| ŵ = (XᵀX)⁻¹Xᵀy (LSE)
|
||||||
**** Regularization
|
**** Regularization
|
||||||
evitare l'overfitting applicando dei constraint sul weight vector.
|
evitare l'overfitting applicando dei constraint sul weight vector.
|
||||||
Generalmente i pesi sono in media piccoli: ~shrinkage~.
|
Generalmente i pesi sono in media piccoli: ~shrinkage~.
|
||||||
|
@ -140,7 +141,7 @@ XᵀX per migliorare la stabilita` numerica dell'inversione
|
||||||
Si puo` anche usare ~lasso~ nel caso di soluzioni sparse
|
Si puo` anche usare ~lasso~ nel caso di soluzioni sparse
|
||||||
(least absolute shrinkage and selection operator)
|
(least absolute shrinkage and selection operator)
|
||||||
che sostituisce ‖w‖₂ con ‖w‖₁=∑|wᵢ|
|
che sostituisce ‖w‖₂ con ‖w‖₁=∑|wᵢ|
|
||||||
| w* = argmin_w (y-X·w)ᵀ(y-X·w) + λ‖w‖1
|
| w* = argmin_w (y-X·w)ᵀ(y-X·w) + λ‖w‖₁
|
||||||
Minimizzare la norma significa immaginare che X sia affetto da errore
|
Minimizzare la norma significa immaginare che X sia affetto da errore
|
||||||
D e minimizzare l'errore:
|
D e minimizzare l'errore:
|
||||||
| (X+D)w = Xw + Dw
|
| (X+D)w = Xw + Dw
|
||||||
|
@ -335,7 +336,7 @@ mai viste.
|
||||||
Permette di trasformare un sistema induttivo in deduttivo
|
Permette di trasformare un sistema induttivo in deduttivo
|
||||||
** TODO Path Through hyp. space
|
** TODO Path Through hyp. space
|
||||||
Vedi che vuole sapere
|
Vedi che vuole sapere
|
||||||
** TODO Trees
|
** Trees
|
||||||
** Rules
|
** Rules
|
||||||
Ordered rules are a chain of /if-then-else/.
|
Ordered rules are a chain of /if-then-else/.
|
||||||
#+BEGIN_SRC
|
#+BEGIN_SRC
|
||||||
|
|
Loading…
Reference in a new issue