{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from scipy import stats\n", "import numpy as np\n", "\n", "data='../3classdata/W/1.csv'\n", "window=3\n", "resolution=500\n", "shift=3\n", "\n", "import glob\n", "from functools import partial\n", " " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([-1. , -0.48715223, -0.40806948, -0.3564728 , -0.31673373,\n", " -0.28370106, -0.25499594, -0.22931092, -0.20584419, -0.18406497,\n", " -0.16360107, -0.14417942, -0.1255919 , -0.10767457, -0.09029443,\n", " -0.07334053, -0.05671771, -0.04034218, -0.02413812, -0.00803494,\n", " 0.00803494, 0.02413812, 0.04034218, 0.05671771, 0.07334053,\n", " 0.09029443, 0.10767457, 0.1255919 , 0.14417942, 0.16360107,\n", " 0.18406497, 0.20584419, 0.22931092, 0.25499594, 0.28370106,\n", " 0.31673373, 0.3564728 , 0.40806948, 0.48715223, 1. ])" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def gaussian_interval(resolution):\n", " # use a gaussian distribution, mean 0, std deviation 0.25\n", " distribution = stats.norm(loc=0, scale=0.25)\n", " # bounds of range for inverse comulative distribution function\n", " bounds = distribution.cdf([-1, 1])\n", " # generate linear space of 2*resolution points using bounds\n", " linsp = np.linspace(*bounds, num=2*resolution)\n", " # obtain the array of 2*resolution points\n", " return distribution.ppf(linsp)\n", "gaussian_interval(20)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(data, dtype=float, header=None)\n", "# rotate dataframe (each column is a temporal sequence)\n", "df = df.T.reset_index().reindex(axis=1).drop(['index'], axis=1).infer_objects()\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012345678910111213141516171819
0-0.794245-1.0000000.828071-0.9941680.9447980.668407-0.373019-0.1506700.8471870.9930110.9976490.4248280.707602-0.548011-0.897720-0.9961910.7333331.000000-1.000000-1.000000
1-0.773646-0.9820510.838096-1.0000000.7027600.490809-0.5362550.5223350.9496221.0000001.0000000.4167610.719298-0.696552-0.840543-1.0000000.7212120.707079-0.989454-0.998286
2-0.704494-0.9417070.867532-0.9715830.5159240.189991-0.6675240.5570230.9462640.9405910.540661-0.1312390.754386-0.875862-0.722166-0.9907370.7151520.664768-0.700204-0.977304
3-0.702488-0.7276780.847612-0.9567490.222930-0.078956-0.7595380.5732420.9143580.871396-0.152875-0.2462310.766082-0.839788-0.739604-0.9377560.6606060.243287-0.358765-0.947124
4-0.653693-0.4749160.893637-0.896301-0.210191-0.359162-0.840516-1.0000000.9932830.808842-0.618419-0.5358110.812865-0.818568-0.519115-0.8232840.6303030.222132-0.169913-0.810559
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 \\\n", "0 -0.794245 -1.000000 0.828071 -0.994168 0.944798 0.668407 -0.373019 \n", "1 -0.773646 -0.982051 0.838096 -1.000000 0.702760 0.490809 -0.536255 \n", "2 -0.704494 -0.941707 0.867532 -0.971583 0.515924 0.189991 -0.667524 \n", "3 -0.702488 -0.727678 0.847612 -0.956749 0.222930 -0.078956 -0.759538 \n", "4 -0.653693 -0.474916 0.893637 -0.896301 -0.210191 -0.359162 -0.840516 \n", "\n", " 7 8 9 10 11 12 13 \\\n", "0 -0.150670 0.847187 0.993011 0.997649 0.424828 0.707602 -0.548011 \n", "1 0.522335 0.949622 1.000000 1.000000 0.416761 0.719298 -0.696552 \n", "2 0.557023 0.946264 0.940591 0.540661 -0.131239 0.754386 -0.875862 \n", "3 0.573242 0.914358 0.871396 -0.152875 -0.246231 0.766082 -0.839788 \n", "4 -1.000000 0.993283 0.808842 -0.618419 -0.535811 0.812865 -0.818568 \n", "\n", " 14 15 16 17 18 19 \n", "0 -0.897720 -0.996191 0.733333 1.000000 -1.000000 -1.000000 \n", "1 -0.840543 -1.000000 0.721212 0.707079 -0.989454 -0.998286 \n", "2 -0.722166 -0.990737 0.715152 0.664768 -0.700204 -0.977304 \n", "3 -0.739604 -0.937756 0.660606 0.243287 -0.358765 -0.947124 \n", "4 -0.519115 -0.823284 0.630303 0.222132 -0.169913 -0.810559 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = ((df - df.min()) / (df.max() - df.min()))*2 - 1\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [], "source": [ "#df = df.apply(partial(\n", " # discretize_series, gaussianInterval=gaussian_interval(resolution)))\n", "#df.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "l = df.apply(lambda x : pd.cut(x, gaussian_interval(resolution), right=True).apply(lambda l : l.left))\n", "r = df.apply(lambda x : pd.cut(x, gaussian_interval(resolution), right=True).apply(lambda l : l.right))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "df = l" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012345678910111213141516171819
0-1.0000-1.000000.770200-1.000000.7702000.66230-0.37440-0.150700.770200.770200.77020.4237000.6860-0.549000-1.00000-1.000000.7182000.77020-1.00000-1.00000
1-1.0000-1.000000.770200-1.000000.6860000.48980-0.542200.518400.770200.770200.77020.4159000.7182-0.718200-1.00000-1.000000.7182000.68600-1.00000-1.00000
2-0.7182-1.000000.770200-1.000000.5132000.18950-0.686000.556200.770200.770200.5358-0.1316000.7182-1.000000-0.77020-1.000000.6860000.66230-0.71820-1.00000
3-0.7182-0.770200.770200-1.000000.222100-0.07941-0.770200.572200.770200.77020-0.1530-0.2464000.7182-1.000000-0.77020-1.000000.6433000.24230-0.35970-1.00000
4-0.6623-0.477500.770200-1.00000-0.210200-0.35970-1.00000-1.000000.770200.77020-0.6275-0.5358000.7702-1.000000-0.52400-1.000000.6275000.22210-0.17000-1.00000
5-0.2886-0.718200.770200-0.77020-0.010980-0.44960-1.000000.686000.770200.71820-0.7182-0.5033000.7702-1.000000-0.27670-0.718200.6275000.28620-0.05148-0.48980
6-0.1266-0.374400.770200-1.000000.180400-0.60180-1.000000.039360.770200.60180-1.0000-1.0000000.7702-1.0000000.07677-0.643300.041910-0.190300.29480-0.53580
70.1614-0.045730.662300-0.770200.302500-0.71820-1.00000-0.094060.770200.64330-1.0000-1.0000000.7702-1.0000000.48980-0.51840-0.303800-0.572200.14400-0.39490
80.49410.249500.386400-0.62750-0.036190-0.77020-1.000000.114600.770200.66230-0.6275-1.0000000.7702-1.0000000.77020-0.41350-0.643300-0.718200.47750-0.16840
90.50810.542200.179600-0.27560-0.513200-1.00000-1.000000.456000.770200.64330-0.1008-1.0000000.7702-1.0000000.77020-0.35110-0.770200-0.770200.61380-0.01601
100.60180.443400.001568-0.27910-0.494100-1.00000-1.000000.718200.718200.524000.1188-1.0000000.7702-1.0000000.77020-0.26110-1.000000-1.000000.66230-0.05212
110.66230.37830-0.198800-0.02671-0.518400-1.00000-1.000000.770200.563900.326000.3949-0.6860000.7702-1.0000000.77020-0.15070-1.000000-1.000000.71820-0.07809
120.77020.66230-0.4086000.17800-0.326000-1.00000-1.000000.643300.07415-0.252600.2165-1.0000000.7702-0.3823000.770200.15530-1.000000-1.000000.718200.52400
130.77020.77020-0.2948000.529700.235400-1.00000-1.000000.397100.27220-0.718200.0286-0.6623000.7182-0.1962000.770200.34130-1.000000-1.000000.718200.54900
140.77020.77020-0.2258000.770200.601800-1.00000-1.000000.30120-0.20750-1.000000.3844-0.5722000.56390.0682700.770200.77020-1.000000-1.000000.718200.71820
150.77020.77020-0.6138000.770200.718200-1.00000-1.000000.27910-0.51840-1.000000.5639-0.4941000.21560.4700000.770200.51840-1.000000-1.000000.770200.77020
160.77020.77020-1.0000000.770200.770200-1.00000-1.000000.39060-0.34130-1.000000.6275-0.3994000.15760.7702000.718200.71820-1.000000-0.770200.718200.77020
170.77020.71820-1.0000000.770200.770200-1.00000-1.000000.61380-0.57220-0.718200.7182-0.4628000.13450.7702000.686000.77020-1.000000-0.770200.770200.77020
180.77020.62750-1.0000000.770200.770200-0.66230-1.000000.77020-0.71820-0.363300.7702-0.423700-0.33350.7702000.581200.77020-1.000000-0.770200.770200.77020
190.77020.52400-1.0000000.770200.354500-0.47750-1.000000.77020-0.68600-0.397100.7702-0.310500-0.61380.7702000.572200.77020-1.000000-1.000000.770200.77020
200.77020.44040-1.0000000.770200.001568-0.24130-1.000000.77020-0.66230-0.320200.7702-0.397100-0.55620.7702000.518400.77020-1.000000-0.770200.770200.77020
210.77020.57220-1.0000000.77020-0.202300-0.08404-1.000000.77020-0.71820-0.277900.7702-0.418500-0.55620.7702000.524000.77020-1.000000-0.770200.770200.77020
220.77020.64330-1.0000000.77020-0.2403000.21290-1.000000.71820-0.77020-0.274400.7702-0.440400-0.53580.5132000.518400.77020-0.770200-0.770200.770200.77020
230.77020.71820-1.0000000.45940-0.1812000.31880-1.000000.64330-1.00000-0.258900.7702-0.470000-0.46280.2156000.529700.77020-0.627500-0.627500.770200.77020
240.77020.71820-1.0000000.446400.0015680.48560-0.342900.50330-1.00000-0.265400.7702-0.408600-0.44040.2814000.529700.77020-0.556200-0.449600.770200.77020
250.66230.77020-1.0000000.277900.2474000.613800.192000.45280-1.00000-0.272200.77020.095410-0.41590.2886000.549000.77020-0.4628000.122300.770200.77020
260.26220.77020-1.0000000.122300.3744000.718200.470000.43750-1.00000-0.279100.77020.301200-0.42900.2710000.556200.77020-0.3885000.191200.718200.77020
270.24640.77020-1.000000-0.091380.4856000.770200.718200.41590-1.00000-0.277900.77020.770200-0.39280.2515000.572200.77020-0.2973000.085370.718200.77020
28-0.22390.66230-1.000000-0.143300.4159000.770200.770200.37640-1.00000-0.249500.77020.770200-0.44040.2165000.563900.77020-0.0728400.293600.572200.77020
29-0.34940.56390-0.718200-0.261100.2779000.770200.770200.36880-1.00000-0.276700.44960.770200-0.51320.1359000.556200.44040-0.0368300.401600.288600.68600
30-0.77020.33660-0.643300-0.29480-0.0489200.770200.770200.33350-1.00000-0.28140-0.15220.770200-0.57220.1591000.513200.51840-0.0248200.384400.313200.50330
31-1.00000.36150-0.459400-0.31880-0.2023000.770200.770200.33500-1.00000-0.25570-0.47750.718200-0.7182-0.0728400.437500.466400.0059590.39280-0.462800.52970
32-1.00000.20750-0.386400-0.37060-0.5422000.770200.770200.33660-1.00000-0.17480-0.15530.426300-1.0000-0.0210400.302500.392800.0604900.44040-0.770200.57220
33-1.00000.08073-0.170800-0.37640-0.5910000.770200.556200.30910-1.00000-0.056620.18620.485600-1.00000.0053320.292300.307800.4290000.46280-0.429000.62750
34-1.00000.09947-0.007214-0.38030-0.7182000.770200.055980.27440-1.000000.019150.39490.542200-1.00000.0406400.073490.257900.6275000.64330-0.718200.66230
35-1.0000-0.253600.120200-0.38030-1.0000000.77020-0.443400.27330-0.770200.084040.44960.503300-1.00000.065020-0.217400.141800.7702000.60180-0.643300.53580
36-1.0000-0.212000.293600-0.38030-1.0000000.77020-0.449600.26430-0.770200.116000.46640.462800-1.00000.127300-0.508100.078090.7702000.71820-0.572200.45280
37-1.0000-0.293600.317400-0.39060-1.0000000.77020-0.643300.24740-0.770200.146200.51840.459400-1.0000-0.032390-1.000000.047010.7702000.77020-0.770200.29860
38-1.0000-0.366900.318800-0.39280-1.0000000.77020-0.627500.22300-0.770200.082050.1029-0.002195-1.0000-0.129400-0.77020-0.032390.7702000.77020-1.000000.15000
39-1.0000-0.349400.380300-0.40160-1.0000000.77020-0.718200.20670-0.770200.02419-0.4775-0.581200-1.0000-0.118800-1.00000-0.012240.7702000.77020-1.00000-0.05020
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 \\\n", "0 -1.0000 -1.00000 0.770200 -1.00000 0.770200 0.66230 -0.37440 -0.15070 \n", "1 -1.0000 -1.00000 0.770200 -1.00000 0.686000 0.48980 -0.54220 0.51840 \n", "2 -0.7182 -1.00000 0.770200 -1.00000 0.513200 0.18950 -0.68600 0.55620 \n", "3 -0.7182 -0.77020 0.770200 -1.00000 0.222100 -0.07941 -0.77020 0.57220 \n", "4 -0.6623 -0.47750 0.770200 -1.00000 -0.210200 -0.35970 -1.00000 -1.00000 \n", "5 -0.2886 -0.71820 0.770200 -0.77020 -0.010980 -0.44960 -1.00000 0.68600 \n", "6 -0.1266 -0.37440 0.770200 -1.00000 0.180400 -0.60180 -1.00000 0.03936 \n", "7 0.1614 -0.04573 0.662300 -0.77020 0.302500 -0.71820 -1.00000 -0.09406 \n", "8 0.4941 0.24950 0.386400 -0.62750 -0.036190 -0.77020 -1.00000 0.11460 \n", "9 0.5081 0.54220 0.179600 -0.27560 -0.513200 -1.00000 -1.00000 0.45600 \n", "10 0.6018 0.44340 0.001568 -0.27910 -0.494100 -1.00000 -1.00000 0.71820 \n", "11 0.6623 0.37830 -0.198800 -0.02671 -0.518400 -1.00000 -1.00000 0.77020 \n", "12 0.7702 0.66230 -0.408600 0.17800 -0.326000 -1.00000 -1.00000 0.64330 \n", "13 0.7702 0.77020 -0.294800 0.52970 0.235400 -1.00000 -1.00000 0.39710 \n", "14 0.7702 0.77020 -0.225800 0.77020 0.601800 -1.00000 -1.00000 0.30120 \n", "15 0.7702 0.77020 -0.613800 0.77020 0.718200 -1.00000 -1.00000 0.27910 \n", "16 0.7702 0.77020 -1.000000 0.77020 0.770200 -1.00000 -1.00000 0.39060 \n", "17 0.7702 0.71820 -1.000000 0.77020 0.770200 -1.00000 -1.00000 0.61380 \n", "18 0.7702 0.62750 -1.000000 0.77020 0.770200 -0.66230 -1.00000 0.77020 \n", "19 0.7702 0.52400 -1.000000 0.77020 0.354500 -0.47750 -1.00000 0.77020 \n", "20 0.7702 0.44040 -1.000000 0.77020 0.001568 -0.24130 -1.00000 0.77020 \n", "21 0.7702 0.57220 -1.000000 0.77020 -0.202300 -0.08404 -1.00000 0.77020 \n", "22 0.7702 0.64330 -1.000000 0.77020 -0.240300 0.21290 -1.00000 0.71820 \n", "23 0.7702 0.71820 -1.000000 0.45940 -0.181200 0.31880 -1.00000 0.64330 \n", "24 0.7702 0.71820 -1.000000 0.44640 0.001568 0.48560 -0.34290 0.50330 \n", "25 0.6623 0.77020 -1.000000 0.27790 0.247400 0.61380 0.19200 0.45280 \n", "26 0.2622 0.77020 -1.000000 0.12230 0.374400 0.71820 0.47000 0.43750 \n", "27 0.2464 0.77020 -1.000000 -0.09138 0.485600 0.77020 0.71820 0.41590 \n", "28 -0.2239 0.66230 -1.000000 -0.14330 0.415900 0.77020 0.77020 0.37640 \n", "29 -0.3494 0.56390 -0.718200 -0.26110 0.277900 0.77020 0.77020 0.36880 \n", "30 -0.7702 0.33660 -0.643300 -0.29480 -0.048920 0.77020 0.77020 0.33350 \n", "31 -1.0000 0.36150 -0.459400 -0.31880 -0.202300 0.77020 0.77020 0.33500 \n", "32 -1.0000 0.20750 -0.386400 -0.37060 -0.542200 0.77020 0.77020 0.33660 \n", "33 -1.0000 0.08073 -0.170800 -0.37640 -0.591000 0.77020 0.55620 0.30910 \n", "34 -1.0000 0.09947 -0.007214 -0.38030 -0.718200 0.77020 0.05598 0.27440 \n", "35 -1.0000 -0.25360 0.120200 -0.38030 -1.000000 0.77020 -0.44340 0.27330 \n", "36 -1.0000 -0.21200 0.293600 -0.38030 -1.000000 0.77020 -0.44960 0.26430 \n", "37 -1.0000 -0.29360 0.317400 -0.39060 -1.000000 0.77020 -0.64330 0.24740 \n", "38 -1.0000 -0.36690 0.318800 -0.39280 -1.000000 0.77020 -0.62750 0.22300 \n", "39 -1.0000 -0.34940 0.380300 -0.40160 -1.000000 0.77020 -0.71820 0.20670 \n", "\n", " 8 9 10 11 12 13 14 15 \\\n", "0 0.77020 0.77020 0.7702 0.423700 0.6860 -0.549000 -1.00000 -1.00000 \n", "1 0.77020 0.77020 0.7702 0.415900 0.7182 -0.718200 -1.00000 -1.00000 \n", "2 0.77020 0.77020 0.5358 -0.131600 0.7182 -1.000000 -0.77020 -1.00000 \n", "3 0.77020 0.77020 -0.1530 -0.246400 0.7182 -1.000000 -0.77020 -1.00000 \n", "4 0.77020 0.77020 -0.6275 -0.535800 0.7702 -1.000000 -0.52400 -1.00000 \n", "5 0.77020 0.71820 -0.7182 -0.503300 0.7702 -1.000000 -0.27670 -0.71820 \n", "6 0.77020 0.60180 -1.0000 -1.000000 0.7702 -1.000000 0.07677 -0.64330 \n", "7 0.77020 0.64330 -1.0000 -1.000000 0.7702 -1.000000 0.48980 -0.51840 \n", "8 0.77020 0.66230 -0.6275 -1.000000 0.7702 -1.000000 0.77020 -0.41350 \n", "9 0.77020 0.64330 -0.1008 -1.000000 0.7702 -1.000000 0.77020 -0.35110 \n", "10 0.71820 0.52400 0.1188 -1.000000 0.7702 -1.000000 0.77020 -0.26110 \n", "11 0.56390 0.32600 0.3949 -0.686000 0.7702 -1.000000 0.77020 -0.15070 \n", "12 0.07415 -0.25260 0.2165 -1.000000 0.7702 -0.382300 0.77020 0.15530 \n", "13 0.27220 -0.71820 0.0286 -0.662300 0.7182 -0.196200 0.77020 0.34130 \n", "14 -0.20750 -1.00000 0.3844 -0.572200 0.5639 0.068270 0.77020 0.77020 \n", "15 -0.51840 -1.00000 0.5639 -0.494100 0.2156 0.470000 0.77020 0.51840 \n", "16 -0.34130 -1.00000 0.6275 -0.399400 0.1576 0.770200 0.71820 0.71820 \n", "17 -0.57220 -0.71820 0.7182 -0.462800 0.1345 0.770200 0.68600 0.77020 \n", "18 -0.71820 -0.36330 0.7702 -0.423700 -0.3335 0.770200 0.58120 0.77020 \n", "19 -0.68600 -0.39710 0.7702 -0.310500 -0.6138 0.770200 0.57220 0.77020 \n", "20 -0.66230 -0.32020 0.7702 -0.397100 -0.5562 0.770200 0.51840 0.77020 \n", "21 -0.71820 -0.27790 0.7702 -0.418500 -0.5562 0.770200 0.52400 0.77020 \n", "22 -0.77020 -0.27440 0.7702 -0.440400 -0.5358 0.513200 0.51840 0.77020 \n", "23 -1.00000 -0.25890 0.7702 -0.470000 -0.4628 0.215600 0.52970 0.77020 \n", "24 -1.00000 -0.26540 0.7702 -0.408600 -0.4404 0.281400 0.52970 0.77020 \n", "25 -1.00000 -0.27220 0.7702 0.095410 -0.4159 0.288600 0.54900 0.77020 \n", "26 -1.00000 -0.27910 0.7702 0.301200 -0.4290 0.271000 0.55620 0.77020 \n", "27 -1.00000 -0.27790 0.7702 0.770200 -0.3928 0.251500 0.57220 0.77020 \n", "28 -1.00000 -0.24950 0.7702 0.770200 -0.4404 0.216500 0.56390 0.77020 \n", "29 -1.00000 -0.27670 0.4496 0.770200 -0.5132 0.135900 0.55620 0.44040 \n", "30 -1.00000 -0.28140 -0.1522 0.770200 -0.5722 0.159100 0.51320 0.51840 \n", "31 -1.00000 -0.25570 -0.4775 0.718200 -0.7182 -0.072840 0.43750 0.46640 \n", "32 -1.00000 -0.17480 -0.1553 0.426300 -1.0000 -0.021040 0.30250 0.39280 \n", "33 -1.00000 -0.05662 0.1862 0.485600 -1.0000 0.005332 0.29230 0.30780 \n", "34 -1.00000 0.01915 0.3949 0.542200 -1.0000 0.040640 0.07349 0.25790 \n", "35 -0.77020 0.08404 0.4496 0.503300 -1.0000 0.065020 -0.21740 0.14180 \n", "36 -0.77020 0.11600 0.4664 0.462800 -1.0000 0.127300 -0.50810 0.07809 \n", "37 -0.77020 0.14620 0.5184 0.459400 -1.0000 -0.032390 -1.00000 0.04701 \n", "38 -0.77020 0.08205 0.1029 -0.002195 -1.0000 -0.129400 -0.77020 -0.03239 \n", "39 -0.77020 0.02419 -0.4775 -0.581200 -1.0000 -0.118800 -1.00000 -0.01224 \n", "\n", " 16 17 18 19 \n", "0 0.718200 0.77020 -1.00000 -1.00000 \n", "1 0.718200 0.68600 -1.00000 -1.00000 \n", "2 0.686000 0.66230 -0.71820 -1.00000 \n", "3 0.643300 0.24230 -0.35970 -1.00000 \n", "4 0.627500 0.22210 -0.17000 -1.00000 \n", "5 0.627500 0.28620 -0.05148 -0.48980 \n", "6 0.041910 -0.19030 0.29480 -0.53580 \n", "7 -0.303800 -0.57220 0.14400 -0.39490 \n", "8 -0.643300 -0.71820 0.47750 -0.16840 \n", "9 -0.770200 -0.77020 0.61380 -0.01601 \n", "10 -1.000000 -1.00000 0.66230 -0.05212 \n", "11 -1.000000 -1.00000 0.71820 -0.07809 \n", "12 -1.000000 -1.00000 0.71820 0.52400 \n", "13 -1.000000 -1.00000 0.71820 0.54900 \n", "14 -1.000000 -1.00000 0.71820 0.71820 \n", "15 -1.000000 -1.00000 0.77020 0.77020 \n", "16 -1.000000 -0.77020 0.71820 0.77020 \n", "17 -1.000000 -0.77020 0.77020 0.77020 \n", "18 -1.000000 -0.77020 0.77020 0.77020 \n", "19 -1.000000 -1.00000 0.77020 0.77020 \n", "20 -1.000000 -0.77020 0.77020 0.77020 \n", "21 -1.000000 -0.77020 0.77020 0.77020 \n", "22 -0.770200 -0.77020 0.77020 0.77020 \n", "23 -0.627500 -0.62750 0.77020 0.77020 \n", "24 -0.556200 -0.44960 0.77020 0.77020 \n", "25 -0.462800 0.12230 0.77020 0.77020 \n", "26 -0.388500 0.19120 0.71820 0.77020 \n", "27 -0.297300 0.08537 0.71820 0.77020 \n", "28 -0.072840 0.29360 0.57220 0.77020 \n", "29 -0.036830 0.40160 0.28860 0.68600 \n", "30 -0.024820 0.38440 0.31320 0.50330 \n", "31 0.005959 0.39280 -0.46280 0.52970 \n", "32 0.060490 0.44040 -0.77020 0.57220 \n", "33 0.429000 0.46280 -0.42900 0.62750 \n", "34 0.627500 0.64330 -0.71820 0.66230 \n", "35 0.770200 0.60180 -0.64330 0.53580 \n", "36 0.770200 0.71820 -0.57220 0.45280 \n", "37 0.770200 0.77020 -0.77020 0.29860 \n", "38 0.770200 0.77020 -1.00000 0.15000 \n", "39 0.770200 0.77020 -1.00000 -0.05020 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "def generate_words(col, w, s):\n", " word = []\n", " idx = 0\n", " distance = abs(w-s)\n", " while idx < len(col):\n", " if len(word) == w:\n", " yield word\n", " word = []\n", " if w < s and idx == w:\n", " idx += distance\n", " if w > s:\n", " idx -= distance \n", " else:\n", " word.append(col[idx])\n", " idx+=1" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[-1.0, -1.0, -0.7182], [-0.2886, -0.1266, 0.1614], [0.4941, 0.5081, 0.6018], [0.6623, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.6623], [0.2622, 0.2464, -0.2239], [-0.3494, -0.7702, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0]]\n", "[[-1.0, -1.0, -1.0], [-0.7182, -0.3744, -0.04573], [0.2495, 0.5422, 0.4434], [0.3783, 0.6623, 0.7702], [0.7702, 0.7702, 0.7702], [0.7182, 0.6275, 0.524], [0.4404, 0.5722, 0.6433], [0.7182, 0.7182, 0.7702], [0.7702, 0.7702, 0.6623], [0.5639, 0.3366, 0.3615], [0.2075, 0.08073, 0.09947], [-0.2536, -0.212, -0.2936]]\n", "[[0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.6623], [0.3864, 0.1796, 0.001568], [-0.1988, -0.4086, -0.2948], [-0.2258, -0.6138, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0], [-0.7182, -0.6433, -0.4594], [-0.3864, -0.1708, -0.007214], [0.1202, 0.2936, 0.3174]]\n", "[[-1.0, -1.0, -1.0], [-0.7702, -1.0, -0.7702], [-0.6275, -0.2756, -0.2791], [-0.02671, 0.178, 0.5297], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.4594, 0.4464, 0.2779], [0.1223, -0.09138, -0.1433], [-0.2611, -0.2948, -0.3188], [-0.3706, -0.3764, -0.3803], [-0.3803, -0.3803, -0.3906]]\n", "[[0.7702, 0.686, 0.5132], [-0.01098, 0.1804, 0.3025], [-0.03619, -0.5132, -0.4941], [-0.5184, -0.326, 0.2354], [0.6018, 0.7182, 0.7702], [0.7702, 0.7702, 0.3545], [0.001568, -0.2023, -0.2403], [-0.1812, 0.001568, 0.2474], [0.3744, 0.4856, 0.4159], [0.2779, -0.04892, -0.2023], [-0.5422, -0.591, -0.7182], [-1.0, -1.0, -1.0]]\n", "[[0.6623, 0.4898, 0.1895], [-0.4496, -0.6018, -0.7182], [-0.7702, -1.0, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0], [-1.0, -0.6623, -0.4775], [-0.2413, -0.08404, 0.2129], [0.3188, 0.4856, 0.6138], [0.7182, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702]]\n", "[[-0.3744, -0.5422, -0.686], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0], [-1.0, -0.3429, 0.192], [0.47, 0.7182, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.5562, 0.05598], [-0.4434, -0.4496, -0.6433]]\n", "[[-0.1507, 0.5184, 0.5562], [0.686, 0.03936, -0.09406], [0.1146, 0.456, 0.7182], [0.7702, 0.6433, 0.3971], [0.3012, 0.2791, 0.3906], [0.6138, 0.7702, 0.7702], [0.7702, 0.7702, 0.7182], [0.6433, 0.5033, 0.4528], [0.4375, 0.4159, 0.3764], [0.3688, 0.3335, 0.335], [0.3366, 0.3091, 0.2744], [0.2733, 0.2643, 0.2474]]\n", "[[0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7182], [0.5639, 0.07415, 0.2722], [-0.2075, -0.5184, -0.3413], [-0.5722, -0.7182, -0.686], [-0.6623, -0.7182, -0.7702], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0], [-0.7702, -0.7702, -0.7702]]\n", "[[0.7702, 0.7702, 0.7702], [0.7182, 0.6018, 0.6433], [0.6623, 0.6433, 0.524], [0.326, -0.2526, -0.7182], [-1.0, -1.0, -1.0], [-0.7182, -0.3633, -0.3971], [-0.3202, -0.2779, -0.2744], [-0.2589, -0.2654, -0.2722], [-0.2791, -0.2779, -0.2495], [-0.2767, -0.2814, -0.2557], [-0.1748, -0.05662, 0.01915], [0.08404, 0.116, 0.1462]]\n", "[[0.7702, 0.7702, 0.5358], [-0.7182, -1.0, -1.0], [-0.6275, -0.1008, 0.1188], [0.3949, 0.2165, 0.0286], [0.3844, 0.5639, 0.6275], [0.7182, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.4496, -0.1522, -0.4775], [-0.1553, 0.1862, 0.3949], [0.4496, 0.4664, 0.5184]]\n", "[[0.4237, 0.4159, -0.1316], [-0.5033, -1.0, -1.0], [-1.0, -1.0, -1.0], [-0.686, -1.0, -0.6623], [-0.5722, -0.4941, -0.3994], [-0.4628, -0.4237, -0.3105], [-0.3971, -0.4185, -0.4404], [-0.47, -0.4086, 0.09541], [0.3012, 0.7702, 0.7702], [0.7702, 0.7702, 0.7182], [0.4263, 0.4856, 0.5422], [0.5033, 0.4628, 0.4594]]\n", "[[0.686, 0.7182, 0.7182], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7182], [0.5639, 0.2156, 0.1576], [0.1345, -0.3335, -0.6138], [-0.5562, -0.5562, -0.5358], [-0.4628, -0.4404, -0.4159], [-0.429, -0.3928, -0.4404], [-0.5132, -0.5722, -0.7182], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0]]\n", "[[-0.549, -0.7182, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0], [-1.0, -0.3823, -0.1962], [0.06827, 0.47, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.5132], [0.2156, 0.2814, 0.2886], [0.271, 0.2515, 0.2165], [0.1359, 0.1591, -0.07284], [-0.02104, 0.005332, 0.04064], [0.06502, 0.1273, -0.03239]]\n", "[[-1.0, -1.0, -0.7702], [-0.2767, 0.07677, 0.4898], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7182], [0.686, 0.5812, 0.5722], [0.5184, 0.524, 0.5184], [0.5297, 0.5297, 0.549], [0.5562, 0.5722, 0.5639], [0.5562, 0.5132, 0.4375], [0.3025, 0.2923, 0.07349], [-0.2174, -0.5081, -1.0]]\n", "[[-1.0, -1.0, -1.0], [-0.7182, -0.6433, -0.5184], [-0.4135, -0.3511, -0.2611], [-0.1507, 0.1553, 0.3413], [0.7702, 0.5184, 0.7182], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.4404, 0.5184, 0.4664], [0.3928, 0.3078, 0.2579], [0.1418, 0.07809, 0.04701]]\n", "[[0.7182, 0.7182, 0.686], [0.6275, 0.04191, -0.3038], [-0.6433, -0.7702, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -0.7702], [-0.6275, -0.5562, -0.4628], [-0.3885, -0.2973, -0.07284], [-0.03683, -0.02482, 0.005959], [0.06049, 0.429, 0.6275], [0.7702, 0.7702, 0.7702]]\n", "[[0.7702, 0.686, 0.6623], [0.2862, -0.1903, -0.5722], [-0.7182, -0.7702, -1.0], [-1.0, -1.0, -1.0], [-1.0, -1.0, -0.7702], [-0.7702, -0.7702, -1.0], [-0.7702, -0.7702, -0.7702], [-0.6275, -0.4496, 0.1223], [0.1912, 0.08537, 0.2936], [0.4016, 0.3844, 0.3928], [0.4404, 0.4628, 0.6433], [0.6018, 0.7182, 0.7702]]\n", "[[-1.0, -1.0, -0.7182], [-0.05148, 0.2948, 0.144], [0.4775, 0.6138, 0.6623], [0.7182, 0.7182, 0.7182], [0.7182, 0.7702, 0.7182], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7182, 0.7182, 0.5722], [0.2886, 0.3132, -0.4628], [-0.7702, -0.429, -0.7182], [-0.6433, -0.5722, -0.7702]]\n", "[[-1.0, -1.0, -1.0], [-0.4898, -0.5358, -0.3949], [-0.1684, -0.01601, -0.05212], [-0.07809, 0.524, 0.549], [0.7182, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.7702, 0.7702, 0.7702], [0.686, 0.5033, 0.5297], [0.5722, 0.6275, 0.6623], [0.5358, 0.4528, 0.2986]]\n" ] } ], "source": [ "for _,col in df.iteritems():\n", " print(list(generate_words(col, 3, 5)))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.5" } }, "nbformat": 4, "nbformat_minor": 2 }