210 lines
40 KiB
Text
210 lines
40 KiB
Text
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Support Vector Machines"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 61,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"%matplotlib inline\n",
|
||
|
"import numpy as np\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"from sklearn.svm import SVC"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 62,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"X = np.array([[ 0.46613554, 0.92048757],\n",
|
||
|
" [-0.92129195, 0.06723639],\n",
|
||
|
" [-0.15836636, 0.00430243],\n",
|
||
|
" [-0.24055905, -0.87032292],\n",
|
||
|
" [ 0.06245105, -0.53698416],\n",
|
||
|
" [-0.2265037 , -0.43835751],\n",
|
||
|
" [-0.00480479, -0.17372081],\n",
|
||
|
" [-0.1525277 , -0.34399658],\n",
|
||
|
" [-0.27360329, 0.35339202],\n",
|
||
|
" [-0.77464508, -0.48715511],\n",
|
||
|
" [-0.58724291, 0.74419972],\n",
|
||
|
" [-0.97596949, -0.72172963],\n",
|
||
|
" [ 0.42376225, -0.72655597],\n",
|
||
|
" [ 0.96383922, -0.23371331],\n",
|
||
|
" [ 0.16264643, -0.46949742],\n",
|
||
|
" [-0.74294705, -0.42576417],\n",
|
||
|
" [ 0.05089437, -0.20522071],\n",
|
||
|
" [-0.19442744, 0.09617478],\n",
|
||
|
" [-0.97102743, 0.79663992],\n",
|
||
|
" [ 0.0596995 , -0.70129219],\n",
|
||
|
" [-0.83934851, -0.95616033],\n",
|
||
|
" [-0.38249705, 0.4973605 ],\n",
|
||
|
" [ 0.3474666 , 0.70664397],\n",
|
||
|
" [ 0.35871444, 0.88679345],\n",
|
||
|
" [-0.05914582, 0.23124686],\n",
|
||
|
" [-0.52156643, 0.32986941],\n",
|
||
|
" [-0.53579646, 0.67530208],\n",
|
||
|
" [ 0.13683914, -0.96158184],\n",
|
||
|
" [ 0.65904541, -0.12015303],\n",
|
||
|
" [-0.69078363, 0.5615536 ],\n",
|
||
|
" [ 0.47738323, -0.70919275],\n",
|
||
|
" [ 0.93069669, 0.44019132],\n",
|
||
|
" [ 0.19750088, -0.68869404],\n",
|
||
|
" [-0.75048675, -0.18170522],\n",
|
||
|
" [-0.45288395, -0.25894991],\n",
|
||
|
" [-0.74644547, 0.87781953],\n",
|
||
|
" [ 0.14620452, 0.56864508],\n",
|
||
|
" [ 0.25719272, -0.58405476],\n",
|
||
|
" [ 0.87149524, 0.01384224],\n",
|
||
|
" [-0.71473576, 0.31568314],\n",
|
||
|
" [-0.252637 , -0.67418371],\n",
|
||
|
" [ 0.24718308, 0.95191416],\n",
|
||
|
" [-0.38149953, -0.64066291],\n",
|
||
|
" [-0.23112698, 0.04678807],\n",
|
||
|
" [ 0.72631766, 0.7390158 ],\n",
|
||
|
" [-0.91748062, -0.15131021],\n",
|
||
|
" [ 0.74957917, 0.66966866],\n",
|
||
|
" [ 0.76771849, 0.06662777],\n",
|
||
|
" [-0.04233756, -0.91320835],\n",
|
||
|
" [ 0.63840333, 0.06277738],\n",
|
||
|
" [-0.78887281, -0.90311183],\n",
|
||
|
" [-0.73099834, -0.69587363],\n",
|
||
|
" [-0.50947652, -0.99144951],\n",
|
||
|
" [ 0.14294609, 0.5474932 ],\n",
|
||
|
" [ 0.4367906 , 0.31953258],\n",
|
||
|
" [-0.13970851, 0.81817884],\n",
|
||
|
" [ 0.6440873 , 0.79118775],\n",
|
||
|
" [ 0.41714043, -0.66672029],\n",
|
||
|
" [ 0.59283022, -0.71836746],\n",
|
||
|
" [ 0.55379696, 0.98846202],\n",
|
||
|
" [-0.91819517, 0.34203895],\n",
|
||
|
" [ 0.02020188, 0.83696694],\n",
|
||
|
" [ 0.6182918 , 0.04254014],\n",
|
||
|
" [-0.09354765, -0.30050483],\n",
|
||
|
" [-0.08489545, 0.06431463],\n",
|
||
|
" [-0.11886358, -0.68738895],\n",
|
||
|
" [ 0.44428375, 0.18273761],\n",
|
||
|
" [ 0.26486362, -0.98398013],\n",
|
||
|
" [ 0.13222452, 0.91495035],\n",
|
||
|
" [-0.11101656, 0.00541343],\n",
|
||
|
" [-0.07696178, -0.92720555],\n",
|
||
|
" [ 0.22602214, 0.56040092],\n",
|
||
|
" [ 0.74227542, 0.32930104],\n",
|
||
|
" [ 0.43524657, 0.35332933],\n",
|
||
|
" [-0.89277607, -0.59996171],\n",
|
||
|
" [-0.94836212, 0.78777302],\n",
|
||
|
" [ 0.1783319 , -0.2142071 ],\n",
|
||
|
" [-0.07832238, -0.25046584],\n",
|
||
|
" [ 0.17611799, -0.96927832],\n",
|
||
|
" [-0.95938454, -0.26504646],\n",
|
||
|
" [ 0.58666766, -0.94620881],\n",
|
||
|
" [-0.77336565, 0.46735057],\n",
|
||
|
" [-0.94414054, 0.39044333],\n",
|
||
|
" [ 0.61524645, 0.15907662],\n",
|
||
|
" [-0.09855302, 0.9816656 ],\n",
|
||
|
" [ 0.53937097, 0.34487634]])"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 63,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"y = [\"red\" if x + y > 0.3 else \"green\" for [x,y] in X]\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 64,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<matplotlib.collections.PathCollection at 0x10b52ba50>"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 64,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAhMAAAFkCAYAAACabLnAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzs3Xd8jef/x/HXdc6JiBEjKkFIbNFIiara1drUrJHapUar\nWv3RFh3ab2t0UDrUqqJGKVpa1KhR1ErsUTN2UJuI5Nzn+v1xQiNWIufkzvg8H4/zkFz3ep8Tyfmc\n+77u61Jaa4QQQgghHpXF7ABCCCGESN+kmBBCCCFEikgxIYQQQogUkWJCCCGEECkixYQQQgghUkSK\nCSGEEEKkiBQTQgghhEgRKSaEEEIIkSJSTAghhBAiRaSYEEIIIUSKuK2YUErVVEotVEqdVEo5lFLN\nHrL+M/HrJXwYSqn87soohBBCiJRz55mJbMBW4NX475M6CUhJwC/+UQA45/poQgghhHAVm7t2rLVe\nAiwBUEolZ9N/tdaX3RJKCCGEEC6XFvtMbFNKnVJKLVVKVTU7jBBCCCEezG1nJh7BKaAnsAXICnQH\nVimlKmuttyZeWSnlA9QHIoGYVMwphBBCpHdZgUDgD631+ZTuLM0UE1rr/cD+BE1/K6WKA/2ATvfY\npD4wPTWyCSGEEBlUe2BGSneSZoqJ+9gMVLvPskiAH3/8kaCgoFQLlN7169ePUaNGmR0j3ZHXLfnk\nNXs08roln7xmybd37146dOgA8e+lKZXWi4nyOC9/3EsMQFBQEKGhoamXKJ3LlSuXvF6PQF635JPX\n7NHI65Z88pqliEu6CbitmFBKZcd5m+ctxZRS5YHzWuvjSqlhQEGtdef49d8ADgN7+K/PxDNAPXdl\nFEIIIUTKufPMRCXgz/ivNTAy/usfgJdwjiNROMH6HsAXQCEgGtgO1NFar3ZjRiGEEEKkkDvHmVjF\nA2491Vp3TfT9Z8Bn7sojhBBCCPdIi+NMCDcKCwszO0K6JK9b8slr9mjkdUs+ec3Mp7RO6ijXaYtS\nKhQIDw8Pl443QgghRDJERERQsWJFgIpa64iU7k/OTAghhBAiRaSYEEIIIUSKSDEhhBBCiBSRYkII\nIYQQKSLFhBBCCCFSRIoJIYQQQqSIFBNCCCGESBEpJoQQQgiRIlJMCCGEECJFpJgQQgghRIpIMSGE\nEEKIFJFiQgghhBApIsWEEEIIIVJEigkhhBBCpIgUE0IIIYRIESkmhBBCCJEiUkwIIYQQIkWkmBBC\nCCFEikgxIYQQQogUkWJCCCGEECkixYQQQgghUkSKCSGEEEKkiBQTQgghhEgRKSaEEEIIkSJSTAgh\nhBAiRaSYEEIIIUSKSDEhhBBCiBSRYkIIIYQQKSLFhBBCCCFSRIoJIYQQQqSIFBNCCCGESBEpJoQQ\nQgiRIlJMCCGEECJFpJgQQgghRIpIMSGEEEKIFJFiQgghhBApIsWEEEIIIVJEigkhhBBCpIgUE0II\nIYRIEbcVE0qpmkqphUqpk0oph1KqWRK2eUYpFaGUilFKHVBKdXZXPiGEEEK4hjvPTGQDtgKvxn+v\nH7SyUqoo8DuwAngC+BKYqJSq58aMQgghhEghm7t2rLVeAiwBUEolZZNewCGt9YD47/9RSlUH+gFL\n3RJSCCHSiWPHjjFv3jxiYmKoV68eoaGhZkcS4ra01GeiCrA8UdvS+HYhRCq5fv06I0eOpFb16tSq\nXp2RI0dy/fp1s2NlaqNHj6Zo0aIM/L//Y+i771KxYkU6deyIYRhmRxMCSFvFhC9wJlHbGcBbKeVp\nQh4hMp3r16/zTPXqvNO/P3nXrSPvunW8078/tWvWlILCJJs3b+aNN97gNYeDcw4HFwyDScD06dP5\n9ttvzY4nBODGyxyppV+/fuTKleuOtrCwMMLCwkxKJET69d1337Fjxw42ak2F+LYIrXl661bGjRvH\nm2++aWq+zGjy5MkUsdn4wm7HGt/2Es5ryJPGjeO1114zMZ1ID2bOnMnMmTPvaLt8+bJLj5GWioko\nwC9Rmy9wRWt9834bjRo1Sq4dCuEiv86bRxOH43YhARAKNNaaX+bOTdfFRGRkJGPHjmXXzp0UCQig\nR48eVKhQ4eEbmuzs2bOUMIzbhcQtpbVm3dmzpmQS6cu9PmBHRERQsWJFlx0jLV3m+Bt4LlFbXWC9\nCVmEyJS0vvdNV0nqQp2GrV+/nnJlyzLhiy+wLl7M7xMn8mTFikybNs3saA/11FNPsU4pTiZoiwPm\nWa1Uevpps2IJcQd3jjORXSlVXilVPr6pWPz3heOXD1NKTUmwyXfx64xQSpVRSr0CtAZGuSujEOJO\nzVq25DeLhe0J2rYBvylFs5YtzYqVIlprerz0EuVu3uSoYbAAOGy3015revfsyZUrV8yO+EDdunXD\nJ18+alqtjAVmAM9ZLBxQioGDB5sdTwjAvWcmKgER8Q8NjIz/+sP45X5A4Vsra60jgcY4z0Zsw3lL\naDet9TI3ZhRCJNCrVy/KlSvHUxYLrYBWQGWLhZCQEHr27Gl2vEeyb98+dv/zD+86HOSMb7MBnwDX\nb9zgjz/+SNU8drudX3/9lU8//ZS5c+cSGxv7wPV9fHxYvW4dZerW5VWlaA9ElyvH4iVLqFy5cuqE\nFuIh3DnOxCoeUKxorbveo201zku0QggT5MiRg1Vr1zJ27Fh+nTcPgI9btqR3797kyJHDlEw7d+5k\n2NChrF21itx58tDppZfo27cvWbJkSdL2t96ssydqz5ZoeWo4cuQIDerUYf/hw3hbrVwxDAL9/Vm8\nbBllypS573YlSpTg98WLuXr1KrGxseTNmzep4/cIkSrU/a6RpnVKqVAgPDw8XDpgCpFBbdmyhVo1\nauBnt9Pabuc4MFspGjVuzC8LFiTpDdVut1O0cGGejIpiLv99wnkXGGG1cvzECfz8Evf9do8qlSpx\nbts2ZtvthAK7gLZWK5aSJdmxZ48UCCLVJOiAWVFrHZHS/aWlDphCCHGHwQMHUjwujp12O8OB6cBP\nWrPgt99YuXJlkvZhs9kYOWYMC5Qi1GrlbeBZi4VPgHffey/VCondu3ezYcsWvogvJACCga8Mg137\n9rFp06ZUySGEO0gxIdIErTVXr17F4XCYHUWkEYZhsGzFCl42jNuXJABaAP42G0uWLEnyvlq3bs3K\nVasIaNiQn/390dWq8dNPP/H+++8nK9O2bdto2qQJXp6e5M2Vi969e/Pvv/8madszZ5xj8gUlar/1\nfVRUVLKyCJGWSDEhTKW15quvvsK/iD/e3t745PPh3XffJS4uzuxowmRKKbLYbFxN1G4HbgCenskb\nGLdmzZr8unAhh44fZ+WaNbRp0yZZlxV27txJjapVObBkCR/GxtLryhVmT5hArWrVkjQ6aLly5fCw\n2ZiXqH0uzueaHsa8EOJ+pJgQpho2bBh9+/bllM8paAmXSl9i6PChdOvezexowmQWi4UXWrfma6uV\nI/FtGhgBnLfbad26darm+eTjj/GNjWWLYfAWMBT4yzDYd+BAksareOyxx+jVqxfvKcUAYDEwGPg/\ni4XOHTtSpEgR9z4BIdxIiglhmmvXrjF0+FB4Gue56xCgAegGmmlTp3Hw4EGTEwqzDR8xgqwFC1Ja\nKeooRRmbjfeAd999l5CQkFTNsnrFCtoZxh13hZQFqijF6tWrk7SPkaNG8fagQUzIkYNGwFfZsvFa\nv36MmzDBHZGFSDVpaThtkcns3buX61evQ7lEC8oBv8OGDRsoUaKEGdFEGuHv70/Ejh18//33rF27\nlup58jC+Uydq1aqV6lly5sxJ1Pnzd7RpIMpi4XFv7yTtw2az8fHHH/Pee+9x9uxZ8uXLh5eXlxvS\nCpG65MyEME3evHmdX1xKtCD+ex8fn1TNI1xvwYIFNKxXj+DSpWnXtu0j3bGQO3du3nzzTebNm8ek\nSZNMKSQAOnTtynSLhRXx3xs4L7kcsttp3759svbl6elJ4cKFpZAQGYYUE8I0xYsXp0rVKthW2uBc\nfOMVsCy24FvAlzp16pi
|
||
|
"text/plain": [
|
||
|
"<matplotlib.figure.Figure at 0x10b07fad0>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"plt.scatter(X[:,0], X[:,1], c=y)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Exercise\n",
|
||
|
"\n",
|
||
|
"- import the support vector machine classifier from scikit-learn (the SVC class) and train a classifier for the examples above using a linear kernel;\n",
|
||
|
"- read the documentation to find out how to obtain the support vectors and the associated (dual) weights; use this information to analyze the learnt model: \n",
|
||
|
" - how many support vectors have been learnt? \n",
|
||
|
" - are them in the position you would have expected [[1](#note1)]? \n",
|
||
|
" - is there any margin error?\n",
|
||
|
" - is there any classification error (check it using the classifier predictions)?\n",
|
||
|
"- learn a new SVC model using custom C values:\n",
|
||
|
" - how the answers to the questions above change when you use a very high C value (e.g., 1000)?\n",
|
||
|
" - how the answers to the questions above change when you use a very low C value (e.g., 0.3)?\n",
|
||
|
"- learn a new SVC model using a rbf kernel:\n",
|
||
|
" - is the new kernel able to capture the linear model?\n",
|
||
|
" - are you surprised by the above answer? Regarless to whether you are surprised or not: why?\n",
|
||
|
" \n",
|
||
|
"<a name=\"note1\">[1]</a> If you make two plots one after the other (in the same cell), the plots will be merged into a single one. You may want to use this feature to plot the support vectors on top of the scatter plot for the dataset."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {
|
||
|
"collapsed": true
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"anaconda-cloud": {},
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.7.5"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 1
|
||
|
}
|