366 lines
14 KiB
Text
366 lines
14 KiB
Text
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 81,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import numpy as np\n",
|
||
|
"import math\n",
|
||
|
"from copy import deepcopy\n",
|
||
|
"import sklearn.datasets\n",
|
||
|
"from sklearn.svm import SVC"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 64,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"X,y = sklearn.datasets.make_hastie_10_2()\n",
|
||
|
"X_train = X[0:8000,:]\n",
|
||
|
"y_train = y[0:8000]\n",
|
||
|
"X_test = X[8000:,:]\n",
|
||
|
"y_test = y[8000:]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 72,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"class SVC_:\n",
|
||
|
" def __init__(self, kernel=\"rbf\", degree=\"3\"):\n",
|
||
|
" self.svc = SVC(kernel=kernel, degree=degree)\n",
|
||
|
"\n",
|
||
|
" def fit(self, X, y, sample_weight=None):\n",
|
||
|
" if sample_weight is not None:\n",
|
||
|
" sample_weight = sample_weight * len(X)\n",
|
||
|
"\n",
|
||
|
" self.svc.fit(X,y,sample_weight=sample_weight)\n",
|
||
|
" return self\n",
|
||
|
"\n",
|
||
|
" def predict(self, X):\n",
|
||
|
" return self.svc.predict(X)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Exercise 1\n",
|
||
|
"\n",
|
||
|
"1. Implement the AdaBoost ensemble algorithm by completing the following code:"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 226,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"class AdaBoost:\n",
|
||
|
" def __init__(self, weakModel, T):\n",
|
||
|
" self.model = weakModel\n",
|
||
|
" self.models = []\n",
|
||
|
" self.T = T\n",
|
||
|
" self.a = []\n",
|
||
|
"\n",
|
||
|
" def fit(self, X, y):\n",
|
||
|
" w = [1 / len(X) for x in X]\n",
|
||
|
" for t in range(self.T):\n",
|
||
|
" model = deepcopy(self.model)\n",
|
||
|
" model.fit(X, y, sample_weight = w)\n",
|
||
|
" predictions = model.predict(X)\n",
|
||
|
" self.models.append(model)\n",
|
||
|
" e = sum([w[i] if predictions[i] != y[i] else 0 for i in range(len(y))])\n",
|
||
|
" if t%10 == 0:\n",
|
||
|
" print(\"Weighted Error:\", e)\n",
|
||
|
" a = np.log((1 - e) / e) / 2\n",
|
||
|
" self.a.append(a)\n",
|
||
|
" w = [w[i] * np.exp(-a * y[i] * predictions[i]) for i in range(len(w))]\n",
|
||
|
" w /= sum(w)\n",
|
||
|
" return self\n",
|
||
|
"\n",
|
||
|
" def predict(self, X):\n",
|
||
|
" return np.sign(sum([self.a[t] * self.models[t].predict(X) for t in range(self.T)]))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"In the implementation you are free to assume:\n",
|
||
|
"- that the problem is a binary classification problem with labels in $\\{-1, +1\\}$.\n",
|
||
|
"- that the weakModel can fit a weighted sample set by means of the call `weakModel.fit(X,y,sample_weight=w)` where `w` is a vector of length $|y|$."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"2. Test your implementation on the dataset loaded above and using an SVC with a polynomial kernel. "
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 227,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"C:\\Users\\galat\\.conda\\envs\\aaut\\lib\\site-packages\\sklearn\\svm\\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
|
||
|
" \"avoid this warning.\", FutureWarning)\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Weighted Error: 0.49512499999995935\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"C:\\Users\\galat\\.conda\\envs\\aaut\\lib\\site-packages\\sklearn\\svm\\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
|
||
|
" \"avoid this warning.\", FutureWarning)\n",
|
||
|
"C:\\Users\\galat\\.conda\\envs\\aaut\\lib\\site-packages\\sklearn\\svm\\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
|
||
|
" \"avoid this warning.\", FutureWarning)\n",
|
||
|
"C:\\Users\\galat\\.conda\\envs\\aaut\\lib\\site-packages\\sklearn\\svm\\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
|
||
|
" \"avoid this warning.\", FutureWarning)\n",
|
||
|
"C:\\Users\\galat\\.conda\\envs\\aaut\\lib\\site-packages\\sklearn\\svm\\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
|
||
|
" \"avoid this warning.\", FutureWarning)\n",
|
||
|
"C:\\Users\\galat\\.conda\\envs\\aaut\\lib\\site-packages\\sklearn\\svm\\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
|
||
|
" \"avoid this warning.\", FutureWarning)\n",
|
||
|
"C:\\Users\\galat\\.conda\\envs\\aaut\\lib\\site-packages\\sklearn\\svm\\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
|
||
|
" \"avoid this warning.\", FutureWarning)\n",
|
||
|
"C:\\Users\\galat\\.conda\\envs\\aaut\\lib\\site-packages\\sklearn\\svm\\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
|
||
|
" \"avoid this warning.\", FutureWarning)\n",
|
||
|
"C:\\Users\\galat\\.conda\\envs\\aaut\\lib\\site-packages\\sklearn\\svm\\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
|
||
|
" \"avoid this warning.\", FutureWarning)\n",
|
||
|
"C:\\Users\\galat\\.conda\\envs\\aaut\\lib\\site-packages\\sklearn\\svm\\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
|
||
|
" \"avoid this warning.\", FutureWarning)\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"weakModel = SVC(kernel=\"poly\", degree=3)\n",
|
||
|
"adaboost = AdaBoost(weakModel, 10)\n",
|
||
|
"adaboost.fit(X_train, y_train)\n",
|
||
|
"y_test_ = adaboost.predict(X_test)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 123,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"0.49425\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"print(0.5 - (y_test.dot(y_test_)) / (2 * len(y_test)))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"3. evaluate the AdaBoost performances as usual by calculating the classification error and compare it with the classification error of the weak model.\n",
|
||
|
"\n",
|
||
|
"**Note 1**: \n",
|
||
|
"since the labels are bound to be in ${+1, -1}$, the classification error (i.e., the number of incorrectly classified examples over the total number of examples) can be easily computed as:\n",
|
||
|
"$$\n",
|
||
|
" error(y,y') = \\frac{N - y \\cdot y'}{2N} = \\frac{1}{2} - \\frac{y \\cdot y'}{2N},\n",
|
||
|
"$$\n",
|
||
|
"where $N$ is the total number of examples. The formula can be derived noticing that $y \\cdot y'$ calculates the number $N_c$ of examples correctly classified minus the number $N_{\\bar c}$ of examples incorrectly classified. We have then $y \\cdot y' = N_c - N_{\\bar c}$ and by noticing that $N = N_c + N_{\\bar c}$:\n",
|
||
|
"$$\n",
|
||
|
" N - y \\cdot y' = N_c + N_{\\bar c} - N_c + N_{\\bar c} = 2 N_{\\bar c} \\Rightarrow \\frac{N - y \\cdot y'}{2 N} = \\frac{N_{\\bar c}}{N}\n",
|
||
|
"$$\n",
|
||
|
"\n",
|
||
|
"**Note 2**:\n",
|
||
|
"do not forget to deepcopy your base model before fitting it to the new data\n",
|
||
|
"\n",
|
||
|
"**Note 3**:\n",
|
||
|
"The SVC model allows specifying weights, but it *does not* work well when weights are normalized (it works well when the weights are larger). The following class takes normalized weights and denormalize them before passing them to the SVC classifier:\n",
|
||
|
"\n",
|
||
|
"```python\n",
|
||
|
" class SVC_:\n",
|
||
|
" def __init__(self, kernel=\"rbf\", degree=\"3\"):\n",
|
||
|
" self.svc = SVC(kernel=kernel, degree=degree)\n",
|
||
|
"\n",
|
||
|
" def fit(self, X,y,sample_weight=None):\n",
|
||
|
" if sample_weight is not None:\n",
|
||
|
" sample_weight = sample_weight * len(X)\n",
|
||
|
"\n",
|
||
|
" self.svc.fit(X,y,sample_weight=sample_weight)\n",
|
||
|
" return self\n",
|
||
|
"\n",
|
||
|
" def predict(self, X):\n",
|
||
|
" return self.svc.predict(X)\n",
|
||
|
"```"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Exercise 2"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"1. Write a weak learner to be used with the AdaBoost algorithm you just wrote. The weak learner that you will implement is the most inaccurate weak learner possible: it basically works by extracting a linear model at random and trying to use that model to classify the examples. Being extracted at random the models it generates do not guarantee that the weighted error $\\epsilon_t$ is smaller than $0.5$. The algorithm solves this problem by flipping the decisions whenever it finds out that $\\epsilon_t > 0.5$ (i.e., if the weighted error is larger than $0.5$ it reverses the sign of all the weights so that the decision surface stays the same, but the regions where it predicts $+1$ and $-1$ are reversed).\n",
|
||
|
"\n",
|
||
|
" It shall work as follows:\n",
|
||
|
"\n",
|
||
|
" - it creates a random linear model by generating the needed weight vector $\\mathbf{w}$ at random (**note**: these are the weights of the linear model, they are *NOT* related in any way to the weights of the examples); each weight shall be sampled from U(-1,1);\n",
|
||
|
" - it evaluates the weighted loss $\\epsilon_t$ on the given dataset and flip the linear model if $\\epsilon_t > 0.5$;\n",
|
||
|
" - at prediction time it predicts +1 if $\\mathbf{x} \\cdot \\mathbf{w} > 0$; it predicts -1 otherwise."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 222,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"class RandomLinearModel:\n",
|
||
|
" def loss(self, y, y_, sample_weight):\n",
|
||
|
" return sum([sample_weight[i] if y[i] != y_[i] else 0 for i in range(len(y))])\n",
|
||
|
" \n",
|
||
|
" def fit(self,X,y,sample_weight=[]):\n",
|
||
|
" self.w = np.random.rand(len(X[0])) * 2 - 1\n",
|
||
|
" if len(sample_weight) == 0:\n",
|
||
|
" sample_weight = [1 / len(X) for x in X]\n",
|
||
|
" if self.loss(y, self.predict(X), sample_weight) > 0.5:\n",
|
||
|
" self.w *= -1\n",
|
||
|
" return self\n",
|
||
|
" \n",
|
||
|
" def predict(self,X):\n",
|
||
|
" return np.sign(X.dot(self.w))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 228,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"0.487\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"rs = RandomLinearModel()\n",
|
||
|
"rs.fit(X_train, y_train)\n",
|
||
|
"predictions = rs.predict(X_test)\n",
|
||
|
"print(0.5 - y_test.dot(predictions)/(2 * len(y_test)))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"2. Learn an AdaBoost model using the RandomLinearModel weak learner printing every $K$ iterations the weighted error and the current error of the ensemble (you are free to choose $K$ so to make your output just frequent enough to let you know what is happening but without flooding the console with messages). Evaluate the training and test error of the final ensemble model."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 229,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Weighted Error: 0.49524999999995933\n",
|
||
|
"Weighted Error: 0.4948541341954795\n",
|
||
|
"Weighted Error: 0.49729398392530305\n",
|
||
|
"Weighted Error: 0.49980867302257964\n",
|
||
|
"Weighted Error: 0.49683487146024025\n",
|
||
|
"Weighted Error: 0.49790489175815233\n",
|
||
|
"Weighted Error: 0.4940625587347454\n",
|
||
|
"Weighted Error: 0.4950371378338745\n",
|
||
|
"Weighted Error: 0.4909255291281916\n",
|
||
|
"Weighted Error: 0.4960331784908466\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"rs = RandomLinearModel()\n",
|
||
|
"a = AdaBoost(rs,100)\n",
|
||
|
"a.fit(X_train,y_train)\n",
|
||
|
"\n",
|
||
|
"y_train_ = a.predict(X_train)\n",
|
||
|
"y_test_ = a.predict(X_test)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 232,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Training Error: 0.462125\n",
|
||
|
"Test Error: 0.49375\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"print(\"Training Error:\", 0.5 - y_train.dot(y_train_)/(2 * len(y_train)))\n",
|
||
|
"print(\"Test Error:\", 0.5 - y_test.dot(y_test_)/(2 * len(y_test)))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"3. Write few paragraphs about what you think about the experiment and about the results you obtained."
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"anaconda-cloud": {},
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.7.5"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 1
|
||
|
}
|