esercizi svm

This commit is contained in:
Francesco Mecca 2020-07-02 11:58:47 +02:00
parent 009ac7e338
commit 2540e7c3ee
3 changed files with 514 additions and 65 deletions

View file

@ -19,7 +19,9 @@
"from scipy.optimize import fmin_bfgs\n", "from scipy.optimize import fmin_bfgs\n",
"import numpy as np\n", "import numpy as np\n",
"from numpy.linalg import norm\n", "from numpy.linalg import norm\n",
"from numpy.linalg import inv" "from numpy.linalg import inv\n",
"from numpy import transpose, identity\n",
"from numpy import zeros"
] ]
}, },
{ {
@ -31,32 +33,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"data": {
"text/plain": [
"array([[ 6.32000000e-03, 1.80000000e+01, 2.31000000e+00, ...,\n",
" 1.53000000e+01, 3.96900000e+02, 4.98000000e+00],\n",
" [ 2.73100000e-02, 0.00000000e+00, 7.07000000e+00, ...,\n",
" 1.78000000e+01, 3.96900000e+02, 9.14000000e+00],\n",
" [ 2.72900000e-02, 0.00000000e+00, 7.07000000e+00, ...,\n",
" 1.78000000e+01, 3.92830000e+02, 4.03000000e+00],\n",
" ..., \n",
" [ 6.07600000e-02, 0.00000000e+00, 1.19300000e+01, ...,\n",
" 2.10000000e+01, 3.96900000e+02, 5.64000000e+00],\n",
" [ 1.09590000e-01, 0.00000000e+00, 1.19300000e+01, ...,\n",
" 2.10000000e+01, 3.93450000e+02, 6.48000000e+00],\n",
" [ 4.74100000e-02, 0.00000000e+00, 1.19300000e+01, ...,\n",
" 2.10000000e+01, 3.96900000e+02, 7.88000000e+00]])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"boston = datasets.load_boston()\n", "boston = datasets.load_boston()\n",
"data = np.array(boston.data)" "data = np.array(boston.data)"
@ -71,24 +50,23 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Boston House Prices dataset\n", ".. _boston_dataset:\n",
"\n", "\n",
"Notes\n", "Boston house prices dataset\n",
"------\n", "---------------------------\n",
"Data Set Characteristics: \n", "\n",
"**Data Set Characteristics:** \n",
"\n", "\n",
" :Number of Instances: 506 \n", " :Number of Instances: 506 \n",
"\n", "\n",
" :Number of Attributes: 13 numeric/categorical predictive\n", " :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.\n",
" \n",
" :Median Value (attribute 14) is usually the target\n",
"\n", "\n",
" :Attribute Information (in order):\n", " :Attribute Information (in order):\n",
" - CRIM per capita crime rate by town\n", " - CRIM per capita crime rate by town\n",
@ -111,7 +89,7 @@
" :Creator: Harrison, D. and Rubinfeld, D.L.\n", " :Creator: Harrison, D. and Rubinfeld, D.L.\n",
"\n", "\n",
"This is a copy of UCI ML housing dataset.\n", "This is a copy of UCI ML housing dataset.\n",
"http://archive.ics.uci.edu/ml/datasets/Housing\n", "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/\n",
"\n", "\n",
"\n", "\n",
"This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.\n", "This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.\n",
@ -125,11 +103,10 @@
"The Boston house-price data has been used in many machine learning papers that address regression\n", "The Boston house-price data has been used in many machine learning papers that address regression\n",
"problems. \n", "problems. \n",
" \n", " \n",
"**References**\n", ".. topic:: References\n",
"\n", "\n",
" - Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.\n", " - Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.\n",
" - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.\n", " - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.\n",
" - many more! (see http://archive.ics.uci.edu/ml/datasets/Housing)\n",
"\n" "\n"
] ]
} }
@ -147,10 +124,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 4,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"t = np.ones(len(data)).reshape(len(data),1)\n", "t = np.ones(len(data)).reshape(len(data),1)\n",
@ -167,10 +142,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 5,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"X,y = data[0:400,:], target[0:400]\n", "X,y = data[0:400,:], target[0:400]\n",
@ -223,12 +196,192 @@
" where $y'_i$ is your model prediction for the i-th example, and $n$ is the number of examples." " where $y'_i$ is your model prediction for the i-th example, and $n$ is the number of examples."
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"least squares: $(X^T X)^{-1}X^T y $"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"least_squares = lambda x,y: inv(x.T.dot(x)).dot(x.T.dot(y))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([-1.91246374e-01, 4.42289967e-02, 5.52207977e-02, 1.71631351e+00,\n",
" -1.49957220e+01, 4.88773025e+00, 2.60921031e-03, -1.29480799e+00,\n",
" 4.84787214e-01, -1.54006673e-02, -8.08795026e-01, -1.29230427e-03,\n",
" -5.17953791e-01, 2.86725996e+01])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"least_squares(X, y)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ridge regression: ŵ = (XᵀX + λI)⁻¹Xᵀy"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def ridge_regression(x, y, lmb):\n",
" I = identity(len(X[0]))\n",
" return inv(x.T.dot(x) + lmb * I).dot(x.T).dot(y)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([-1.92905668e-01, 4.45989360e-02, 4.80153773e-02, 1.70985336e+00,\n",
" -1.21920175e+01, 5.08501051e+00, 8.60369052e-04, -1.23267578e+00,\n",
" 4.67418151e-01, -1.51800832e-02, -7.48061272e-01, 7.58288257e-04,\n",
" -5.09848508e-01, 2.39216289e+01])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ridge_regression(X, y, 0.1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Lasso: w* = argmin_w (y-X·w)ᵀ(y-X·w) + λ‖w‖₁"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def lasso(w, x, y, lmb):\n",
" return (y - x.dot(w)).T.dot(y-x.dot(w)) + lmb * sum(w)\n",
"lasso_regression = lambda x, y, lmb: fmin_bfgs(lasso, zeros(len(x[0])), args= (x,y,lmb))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: Desired error not necessarily achieved due to precision loss.\n",
" Current function value: 8923.891671\n",
" Iterations: 18\n",
" Function evaluations: 1047\n",
" Gradient evaluations: 69\n"
]
},
{
"data": {
"text/plain": [
"array([-1.91323083e-01, 4.42279444e-02, 5.53631382e-02, 1.71490995e+00,\n",
" -1.50058027e+01, 4.89083693e+00, 2.62969748e-03, -1.29453598e+00,\n",
" 4.84604103e-01, -1.53869118e-02, -8.08349218e-01, -1.26958995e-03,\n",
" -5.17749228e-01, 2.86320548e+01])"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lasso_regression(X,y,0.1)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def S(actual, predicted):\n",
" from math import sqrt\n",
" return sqrt(sum((predicted[i] - actual[i])**2 for i in range(len(actual))) / len(actual))\n",
"predicted = lambda x, w: x.dot(w)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: Desired error not necessarily achieved due to precision loss.\n",
" Current function value: 8922.270593\n",
" Iterations: 18\n",
" Function evaluations: 555\n",
" Gradient evaluations: 37\n",
"Least squares training set s statistics: 4.722840838326382\n",
"Least squares test set s statistics: 6.155792280412581\n",
"Ridge regression training set s statistics: 4.734160907532518\n",
"Ridge regression test set s statistics: 5.98737876633626\n",
"Lasso regression training set s statistics: 4.722840845344215\n",
"Lasso regression test set s statistics: 6.155671334655423\n"
]
}
],
"source": [
"w_least = least_squares(X, y)\n",
"w_ridge = ridge_regression(X, y, 0.01)\n",
"w_ridge = ridge_regression(X, y, 0.2)\n",
"w_lasso = lasso_regression(X, y, 0.01)\n",
"print(\"Least squares training set s statistics:\", S(y, predicted(X, w_least)))\n",
"print(\"Least squares test set s statistics:\", S(y_test, predicted(X_test, w_least)))\n",
"print(\"Ridge regression training set s statistics:\", S(y, predicted(X, w_ridge)))\n",
"print(\"Ridge regression test set s statistics:\", S(y_test, predicted(X_test, w_ridge)))\n",
"print(\"Lasso regression training set s statistics:\", S(y, predicted(X, w_lasso)))\n",
"print(\"Lasso regression test set s statistics:\", S(y_test, predicted(X_test, w_lasso)))"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [] "source": []
} }
@ -250,7 +403,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.7.5" "version": "3.7.7"
} }
}, },
"nbformat": 4, "nbformat": 4,

File diff suppressed because one or more lines are too long

View file

@ -122,12 +122,13 @@ invertibile
Possiamo inquadrare questo problema come un problema di minimizzazione Possiamo inquadrare questo problema come un problema di minimizzazione
della norma di e. p = X·$\hat{w}$: L'intero problema consiste in: della norma di e. p = X·$\hat{w}$: L'intero problema consiste in:
| $minimize_{\hat{w}}\Vert X \hat{w} - y \Vert_2^2$ | $minimize_{\hat{w}}\Vert X \hat{w} - y \Vert_2^2$
| minimize_ŵ ‖Xŵ-y‖²₂
La soluzione consiste nell'imporre l'ortogonalita` di e e C(X), ovvero La soluzione consiste nell'imporre l'ortogonalita` di e e C(X), ovvero
Xᵀ·e=0; quindi: Xᵀ·e=0; quindi:
| Xᵀ·e = 0; e = y-X·ŵ | Xᵀ·e = 0; e = y-X·ŵ
| Xᵀ(y-X·ŵ) = 0 | Xᵀ(y-X·ŵ) = 0
| Xᵀy = XᵀXŵ | Xᵀy = XᵀXŵ
| ŵ = (XᵀX)⁻¹Xᵀy | ŵ = (XᵀX)⁻¹Xᵀy (LSE)
**** Regularization **** Regularization
evitare l'overfitting applicando dei constraint sul weight vector. evitare l'overfitting applicando dei constraint sul weight vector.
Generalmente i pesi sono in media piccoli: ~shrinkage~. Generalmente i pesi sono in media piccoli: ~shrinkage~.
@ -140,7 +141,7 @@ XᵀX per migliorare la stabilita` numerica dell'inversione
Si puo` anche usare ~lasso~ nel caso di soluzioni sparse Si puo` anche usare ~lasso~ nel caso di soluzioni sparse
(least absolute shrinkage and selection operator) (least absolute shrinkage and selection operator)
che sostituisce ‖w‖₂ con ‖w‖₁=∑|wᵢ| che sostituisce ‖w‖₂ con ‖w‖₁=∑|wᵢ|
| w* = argmin_w (y-X·w)ᵀ(y-X·w) + λ‖w‖1 | w* = argmin_w (y-X·w)ᵀ(y-X·w) + λ‖w‖
Minimizzare la norma significa immaginare che X sia affetto da errore Minimizzare la norma significa immaginare che X sia affetto da errore
D e minimizzare l'errore: D e minimizzare l'errore:
| (X+D)w = Xw + Dw | (X+D)w = Xw + Dw
@ -335,7 +336,7 @@ mai viste.
Permette di trasformare un sistema induttivo in deduttivo Permette di trasformare un sistema induttivo in deduttivo
** TODO Path Through hyp. space ** TODO Path Through hyp. space
Vedi che vuole sapere Vedi che vuole sapere
** TODO Trees ** Trees
** Rules ** Rules
Ordered rules are a chain of /if-then-else/. Ordered rules are a chain of /if-then-else/.
#+BEGIN_SRC #+BEGIN_SRC