esercizi svm

2020-07-02 11:58:47 +02:00 · 2020-07-02 11:58:47 +02:00 · 2540e7c3ee
commit 2540e7c3ee
parent 009ac7e338
3 changed files with 514 additions and 65 deletions
--- a/anno3/apprendimento_automatico/esercizi/4/least_squares.ipynb
+++ b/anno3/apprendimento_automatico/esercizi/4/least_squares.ipynb
@ -19,7 +19,9 @@
    "from scipy.optimize import fmin_bfgs\n",
    "import numpy as np\n",
    "from numpy.linalg import norm\n",
-    "from numpy.linalg import inv"
+    "from numpy.linalg import inv\n",
    "from numpy import transpose, identity\n",
    "from numpy import zeros"
   ]
  },
  {
@ -31,32 +33,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "metadata": {},
-   "outputs": [
+   "outputs": [],
    {
     "data": {
      "text/plain": [
       "array([[  6.32000000e-03,   1.80000000e+01,   2.31000000e+00, ...,\n",
       "          1.53000000e+01,   3.96900000e+02,   4.98000000e+00],\n",
       "       [  2.73100000e-02,   0.00000000e+00,   7.07000000e+00, ...,\n",
       "          1.78000000e+01,   3.96900000e+02,   9.14000000e+00],\n",
       "       [  2.72900000e-02,   0.00000000e+00,   7.07000000e+00, ...,\n",
       "          1.78000000e+01,   3.92830000e+02,   4.03000000e+00],\n",
       "       ..., \n",
       "       [  6.07600000e-02,   0.00000000e+00,   1.19300000e+01, ...,\n",
       "          2.10000000e+01,   3.96900000e+02,   5.64000000e+00],\n",
       "       [  1.09590000e-01,   0.00000000e+00,   1.19300000e+01, ...,\n",
       "          2.10000000e+01,   3.93450000e+02,   6.48000000e+00],\n",
       "       [  4.74100000e-02,   0.00000000e+00,   1.19300000e+01, ...,\n",
       "          2.10000000e+01,   3.96900000e+02,   7.88000000e+00]])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "boston = datasets.load_boston()\n",
    "data = np.array(boston.data)"
@ -71,24 +50,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Boston House Prices dataset\n",
+      ".. _boston_dataset:\n",
      "\n",
-      "Notes\n",
+      "Boston house prices dataset\n",
-      "------\n",
+      "---------------------------\n",
-      "Data Set Characteristics:  \n",
+      "\n",
      "**Data Set Characteristics:**  \n",
      "\n",
      "    :Number of Instances: 506 \n",
      "\n",
-      "    :Number of Attributes: 13 numeric/categorical predictive\n",
+      "    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.\n",
      "    \n",
      "    :Median Value (attribute 14) is usually the target\n",
      "\n",
      "    :Attribute Information (in order):\n",
      "        - CRIM     per capita crime rate by town\n",
@ -111,7 +89,7 @@
      "    :Creator: Harrison, D. and Rubinfeld, D.L.\n",
      "\n",
      "This is a copy of UCI ML housing dataset.\n",
-      "http://archive.ics.uci.edu/ml/datasets/Housing\n",
+      "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/\n",
      "\n",
      "\n",
      "This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.\n",
@ -125,11 +103,10 @@
      "The Boston house-price data has been used in many machine learning papers that address regression\n",
      "problems.   \n",
      "     \n",
-      "**References**\n",
+      ".. topic:: References\n",
      "\n",
      "   - Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.\n",
      "   - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.\n",
      "   - many more! (see http://archive.ics.uci.edu/ml/datasets/Housing)\n",
      "\n"
     ]
    }
@ -147,10 +124,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "t = np.ones(len(data)).reshape(len(data),1)\n",
@ -167,10 +142,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X,y = data[0:400,:], target[0:400]\n",
@ -223,12 +196,192 @@
    "    where $y'_i$ is your model prediction for the i-th example, and $n$ is the number of examples."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "least squares: $(X^T X)^{-1}X^T y $"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "least_squares = lambda x,y: inv(x.T.dot(x)).dot(x.T.dot(y))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-1.91246374e-01,  4.42289967e-02,  5.52207977e-02,  1.71631351e+00,\n",
       "       -1.49957220e+01,  4.88773025e+00,  2.60921031e-03, -1.29480799e+00,\n",
       "        4.84787214e-01, -1.54006673e-02, -8.08795026e-01, -1.29230427e-03,\n",
       "       -5.17953791e-01,  2.86725996e+01])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "least_squares(X, y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Ridge regression: ŵ  = (XᵀX + λI)⁻¹Xᵀy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def ridge_regression(x, y, lmb):\n",
    "    I = identity(len(X[0]))\n",
    "    return inv(x.T.dot(x) + lmb * I).dot(x.T).dot(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-1.92905668e-01,  4.45989360e-02,  4.80153773e-02,  1.70985336e+00,\n",
       "       -1.21920175e+01,  5.08501051e+00,  8.60369052e-04, -1.23267578e+00,\n",
       "        4.67418151e-01, -1.51800832e-02, -7.48061272e-01,  7.58288257e-04,\n",
       "       -5.09848508e-01,  2.39216289e+01])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ridge_regression(X, y, 0.1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Lasso: w* = argmin_w (y-X·w)ᵀ(y-X·w) + λ‖w‖₁"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def lasso(w, x, y, lmb):\n",
    "    return (y - x.dot(w)).T.dot(y-x.dot(w)) + lmb * sum(w)\n",
    "lasso_regression = lambda x, y, lmb: fmin_bfgs(lasso, zeros(len(x[0])), args= (x,y,lmb))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: Desired error not necessarily achieved due to precision loss.\n",
      "         Current function value: 8923.891671\n",
      "         Iterations: 18\n",
      "         Function evaluations: 1047\n",
      "         Gradient evaluations: 69\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([-1.91323083e-01,  4.42279444e-02,  5.53631382e-02,  1.71490995e+00,\n",
       "       -1.50058027e+01,  4.89083693e+00,  2.62969748e-03, -1.29453598e+00,\n",
       "        4.84604103e-01, -1.53869118e-02, -8.08349218e-01, -1.26958995e-03,\n",
       "       -5.17749228e-01,  2.86320548e+01])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lasso_regression(X,y,0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def S(actual, predicted):\n",
    "    from math import sqrt\n",
    "    return sqrt(sum((predicted[i] - actual[i])**2 for i in range(len(actual))) / len(actual))\n",
    "predicted = lambda x, w: x.dot(w)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: Desired error not necessarily achieved due to precision loss.\n",
      "         Current function value: 8922.270593\n",
      "         Iterations: 18\n",
      "         Function evaluations: 555\n",
      "         Gradient evaluations: 37\n",
      "Least squares training set s statistics: 4.722840838326382\n",
      "Least squares test set s statistics: 6.155792280412581\n",
      "Ridge regression training set s statistics: 4.734160907532518\n",
      "Ridge regression test set s statistics: 5.98737876633626\n",
      "Lasso regression training set s statistics: 4.722840845344215\n",
      "Lasso regression test set s statistics: 6.155671334655423\n"
     ]
    }
   ],
   "source": [
    "w_least = least_squares(X, y)\n",
    "w_ridge = ridge_regression(X, y, 0.01)\n",
    "w_ridge = ridge_regression(X, y, 0.2)\n",
    "w_lasso = lasso_regression(X, y, 0.01)\n",
    "print(\"Least squares training set s statistics:\", S(y, predicted(X, w_least)))\n",
    "print(\"Least squares test set s statistics:\", S(y_test, predicted(X_test, w_least)))\n",
    "print(\"Ridge regression training set s statistics:\", S(y, predicted(X, w_ridge)))\n",
    "print(\"Ridge regression test set s statistics:\", S(y_test, predicted(X_test, w_ridge)))\n",
    "print(\"Lasso regression training set s statistics:\", S(y, predicted(X, w_lasso)))\n",
    "print(\"Lasso regression test set s statistics:\", S(y_test, predicted(X_test, w_lasso)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
@ -250,7 +403,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.5"
+   "version": "3.7.7"
  }
 },
 "nbformat": 4,
--- a/anno3/apprendimento_automatico/esercizi/4/svm.ipynb
+++ b/anno3/apprendimento_automatico/esercizi/4/svm.ipynb
--- a/anno3/apprendimento_automatico/preparazione.org
+++ b/anno3/apprendimento_automatico/preparazione.org
@ -122,12 +122,13 @@ invertibile
 Possiamo inquadrare questo problema come un problema di minimizzazione
 della norma di e. p = X·$\hat{w}$: L'intero problema consiste in:
 | $minimize_{\hat{w}}\Vert X \hat{w} - y \Vert_2^2$
 | minimize_ŵ ‖Xŵ-y‖²₂
 La soluzione consiste nell'imporre l'ortogonalita` di e e C(X), ovvero
 Xᵀ·e=0; quindi:
 | Xᵀ·e = 0; e = y-X·ŵ
 | Xᵀ(y-X·ŵ) = 0
 | Xᵀy = XᵀXŵ
-| ŵ = (XᵀX)⁻¹Xᵀy
+| ŵ = (XᵀX)⁻¹Xᵀy (LSE)
 **** Regularization
 evitare l'overfitting applicando dei constraint sul weight vector.
 Generalmente i pesi sono in media piccoli: ~shrinkage~.
@ -140,7 +141,7 @@ XᵀX per migliorare la stabilita` numerica dell'inversione
 Si puo` anche usare ~lasso~ nel caso di soluzioni sparse
 (least absolute shrinkage and selection operator)
 che sostituisce ‖w‖₂ con ‖w‖₁=∑|wᵢ|
-| w* = argmin_w (y-X·w)ᵀ(y-X·w) + λ‖w‖1
+| w* = argmin_w (y-X·w)ᵀ(y-X·w) + λ‖w‖₁
 Minimizzare la norma significa immaginare che X sia affetto da errore
 D e minimizzare l'errore:
 | (X+D)w = Xw + Dw
@ -335,7 +336,7 @@ mai viste.
  Permette di trasformare un sistema induttivo in deduttivo
 ** TODO Path Through hyp. space
 Vedi che vuole sapere
-** TODO Trees
+** Trees
 ** Rules
 Ordered rules are a chain of /if-then-else/.
 #+BEGIN_SRC