UniTO/anno2/YearI/SecondSem/BDM/progetto/Lab1-1.ipynb

1119 lines
60 KiB
Text
Raw Normal View History

2018-11-22 13:09:11 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"import newsource.libword as libword\n",
"import math\n",
"from decimal import *\n",
"import pandas as pd\n",
"from collections import OrderedDict as odict\n",
"\n",
"datadir = \"./3classdata\"\n",
"window = 3\n",
"resolution = 50\n",
"shift = 3"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<module 'newsource.libword' from '/_old/UNITO/lezioni/YearI/SecondSem/BDM/progetto/newsource/libword.py'>"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import importlib\n",
"importlib.reload(libword)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"# read files from given directory\n",
"def readDatadir(datadir):\n",
" import glob\n",
" frames = odict()\n",
" for dfile in glob.glob(datadir+\"/*/*.csv\"):\n",
" tmp = libword.read_csv(dfile)\n",
" cord, numb = dfile.strip(datadir+'/').strip('.csv').split('/')\n",
" frames[(cord, int(numb))] = tmp\n",
" return frames"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"372"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"originalDfs = readDatadir(datadir)\n",
"len(originalDfs)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#list(originalDfs.keys())"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# normalize between [-1,1]\n",
"normalizeDfs = odict()\n",
"for k, df in originalDfs.items():\n",
" normalizeDfs[k] = libword.normalize(df)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"# discretize on a gaussian interval of 2*resolution steps\n",
"discretizeDfs = dict()\n",
"interval = libword.gaussian_interval(resolution)\n",
"discretizeDfs = libword.parallel(target=libword.discretize, iterable=normalizeDfs, \n",
" interval=interval, how=\"left\", n=12)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#print(interval)\n",
"#discretizeDfs[0]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# extract words from dataset (from window,shift given)\n",
"wordDfs = libword.parallel(target=libword.rolling_window, iterable=discretizeDfs,\n",
" w=window, s=shift, n=12)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Process ForkPoolWorker-28:\n",
"Process ForkPoolWorker-29:\n",
"Process ForkPoolWorker-36:\n",
"Process ForkPoolWorker-26:\n",
"Process ForkPoolWorker-30:\n",
"Process ForkPoolWorker-31:\n",
"Process ForkPoolWorker-25:\n",
"Process ForkPoolWorker-35:\n",
"Process ForkPoolWorker-27:\n",
"Process ForkPoolWorker-32:\n",
"Process ForkPoolWorker-33:\n",
"Traceback (most recent call last):\n",
"Process ForkPoolWorker-34:\n",
"Traceback (most recent call last):\n",
"Traceback (most recent call last):\n",
"Traceback (most recent call last):\n",
"Traceback (most recent call last):\n",
"Traceback (most recent call last):\n",
"Traceback (most recent call last):\n",
"Traceback (most recent call last):\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
"Traceback (most recent call last):\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
"Traceback (most recent call last):\n",
"Traceback (most recent call last):\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
"Traceback (most recent call last):\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
"KeyboardInterrupt\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
"KeyboardInterrupt\n",
"KeyboardInterrupt\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
"KeyboardInterrupt\n",
"KeyboardInterrupt\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 335, in get\n",
" res = self._reader.recv_bytes()\n",
"KeyboardInterrupt\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
"KeyboardInterrupt\n",
"KeyboardInterrupt\n",
" File \"/usr/lib64/python3.5/multiprocessing/connection.py\", line 216, in recv_bytes\n",
" buf = self._recv_bytes(maxlength)\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
"KeyboardInterrupt\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
" File \"/usr/lib64/python3.5/multiprocessing/connection.py\", line 407, in _recv_bytes\n",
" buf = self._recv(4)\n",
"KeyboardInterrupt\n",
"KeyboardInterrupt\n",
" File \"/usr/lib64/python3.5/multiprocessing/connection.py\", line 379, in _recv\n",
" chunk = read(handle, remaining)\n",
"KeyboardInterrupt\n"
]
}
],
"source": [
"# TF: on single columns (monovariate series) of each dataframe\n",
"# associate each series with its 3 most frequent words (according to TF)\n",
"tf_Dfs = libword.parallel(target=libword.extract_words, iterable=wordDfs, \n",
" how=libword.text_freq, n=12)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"#tf_Dfs[0]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>10</th>\n",
" <th>11</th>\n",
" <th>12</th>\n",
" <th>13</th>\n",
" <th>14</th>\n",
" <th>15</th>\n",
" <th>16</th>\n",
" <th>17</th>\n",
" <th>18</th>\n",
" <th>19</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -0.46898)</td>\n",
" <td>(0.36759, 0.43642, 0.51224)</td>\n",
" <td>(0.46898, 0.19116, -0.10077)</td>\n",
" <td>(0.58036, 0.31892, -0.01583)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.06726, 0.06726, 0.51224)</td>\n",
" <td>(-0.14360, -0.13620, 0.15881)</td>\n",
" <td>(0.22710, 0.20859, 0.01583)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.33375, 0.29221)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.51224, 0.51224, 0.58036)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.46898, 0.15881, 0.46898)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-0.16663, 0.24694, 0.05422)</td>\n",
" <td>(0.43642, 0.30513, 0.40993)</td>\n",
" <td>(-0.33375, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -0.19116, -0.12174)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-0.04776, -0.21769, -0.19975)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.38737)</td>\n",
" <td>(-1.00000, -0.58036, -0.26847)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-0.14360, -0.16663, -0.10077)</td>\n",
" <td>(0.51224, 0.38737, 0.28002)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>(0.51224, 0.33375, 0.04133)</td>\n",
" <td>(0.26847, 0.05422, -0.10077)</td>\n",
" <td>(0.58036, 0.40993, 0.18279)</td>\n",
" <td>(0.51224, 0.46898, 0.31892)</td>\n",
" <td>(-0.13620, -0.29221, 0.06726)</td>\n",
" <td>(0.25747, 0.13620, 0.06726)</td>\n",
" <td>(-1.00000, -1.00000, -0.58036)</td>\n",
" <td>(0.43642, 0.46898, 0.46898)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-0.34987, -0.40993, -0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.03492, -0.40993, -1.00000)</td>\n",
" <td>(-0.16663, 0.05422, 0.25747)</td>\n",
" <td>(0.03492, -0.31892, -1.00000)</td>\n",
" <td>(-0.58036, -1.00000, -1.00000)</td>\n",
" <td>(-0.08049, 0.06072, -0.36759)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>(-0.02218, -0.24694, -0.43642)</td>\n",
" <td>(-0.51224, -0.43642, -0.43642)</td>\n",
" <td>(0.04776, -0.10077, -0.23683)</td>\n",
" <td>(-0.00950, -0.02855, -0.29221)</td>\n",
" <td>(0.51224, 0.46898, 0.46898)</td>\n",
" <td>(-0.10767, -0.31892, -0.46898)</td>\n",
" <td>(-0.33375, -0.10767, -0.02855)</td>\n",
" <td>(0.10767, -0.58036, -1.00000)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.51224, 0.29221, -0.19975)</td>\n",
" <td>(0.51224, 0.17462, -0.30513)</td>\n",
" <td>(-0.58036, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.58036, 0.51224, 0.51224)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.46898, 0.58036, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-0.58036, -1.00000, -1.00000)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>(-0.58036, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-0.43642, -0.36759, -0.33375)</td>\n",
" <td>(-0.38737, -1.00000, -1.00000)</td>\n",
" <td>(0.30513, -0.22710, -0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-0.00316, 0.02218, 0.04133)</td>\n",
" <td>(-0.14360, 0.16663, 0.30513)</td>\n",
" <td>(0.08718, 0.28002, -0.19116)</td>\n",
" <td>(-0.14360, -0.25747, -0.46898)</td>\n",
" <td>(-0.02218, 0.20859, 0.15881)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -0.58036)</td>\n",
" <td>(0.17462, 0.00950, -0.20859)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.04776, -0.02855, -0.36759)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.04133, 0.05422, -0.04776)</td>\n",
" <td>(0.33375, 0.30513, 0.18279)</td>\n",
" <td>(-0.51224, -0.31892, -0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.04776, -0.14360, -0.38737)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-0.21769, -0.16663, -0.13620)</td>\n",
" <td>(-0.58036, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -0.58036, -0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-0.58036, -1.00000, -1.00000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -0.58036, -0.51224)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -0.43642, -0.08049)</td>\n",
" <td>(-0.38737, 0.05422, 0.58036)</td>\n",
" <td>(-0.22710, -0.46898, -0.51224)</td>\n",
" <td>(-0.11466, -0.13620, -0.08049)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.31892, 0.58036, 0.51224)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-0.51224, -0.51224, -0.40993)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.11466, 0.16663, 0.12174)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-0.43642, -0.31892, 0.00316)</td>\n",
" <td>(-0.14360, 0.10767, 0.36759)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.51224, 0.51224, 0.43642)</td>\n",
" <td>(-1.00000, -1.00000, -0.46898)</td>\n",
" <td>(-0.40993, -0.38737, -0.38737)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-0.58036, -0.58036, -0.40993)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -0.46898, -0.34987)</td>\n",
" <td>(-0.03492, -0.25747, -0.36759)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.36759, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, 0.12174)</td>\n",
" <td>(0.43642, 0.40993, 0.40993)</td>\n",
" <td>(-0.51224, -0.43642, -0.26847)</td>\n",
" <td>(-0.34987, -0.38737, -0.40993)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-0.58036, -0.46898, -0.40993)</td>\n",
" <td>(-0.19975, 0.43642, 0.51224)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>(-1.00000, -0.33375, -0.17462)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-0.19975, -0.16663, -0.06726)</td>\n",
" <td>(-0.46898, -0.40993, -0.24694)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -0.51224)</td>\n",
" <td>(-1.00000, -1.00000, -0.36759)</td>\n",
" <td>(0.58036, 0.58036, 0.46898)</td>\n",
" <td>(0.38737, 0.43642, 0.46898)</td>\n",
" <td>(-0.13620, 0.05422, 0.29221)</td>\n",
" <td>(-0.43642, -0.46898, -0.43642)</td>\n",
" <td>(0.58036, 0.58036, 0.13620)</td>\n",
" <td>(-0.30513, -0.08049, -0.04133)</td>\n",
" <td>(0.40993, 0.58036, 0.58036)</td>\n",
" <td>(-1.00000, -0.46898, -0.16663)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>(0.26847, 0.33375, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-0.03492, -0.00950, 0.02218)</td>\n",
" <td>(0.08049, 0.22710, 0.51224)</td>\n",
" <td>(0.58036, 0.58036, 0.51224)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-0.10077, 0.09394, 0.03492)</td>\n",
" <td>(0.43642, 0.51224, 0.51224)</td>\n",
" <td>(-1.00000, 0.12174, 0.58036)</td>\n",
" <td>(0.51224, 0.58036, 0.58036)</td>\n",
" <td>(0.38737, 0.58036, 0.51224)</td>\n",
" <td>(-0.40993, -0.31892, -0.29221)</td>\n",
" <td>(0.21769, 0.15114, 0.06072)</td>\n",
" <td>(-0.02855, -0.00316, 0.05422)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-0.18279, 0.51224, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-0.58036, -1.00000, -0.30513)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.02855, 0.02855, 0.02855)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.51224, 0.43642, 0.40993)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-0.19975, -0.31892, -0.46898)</td>\n",
" <td>(0.40993, 0.34987, 0.29221)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.46898, 0.43642, 0.38737)</td>\n",
" <td>(-0.29221, -0.12892, 0.06726)</td>\n",
" <td>(-0.05422, -0.10767, -0.24694)</td>\n",
" <td>(0.40993, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.51224, 0.58036, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-0.33375, -0.28002, -0.22710)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(0.02218, 0.02855, 0.02855)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.34987, 0.29221, 0.23683)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" <td>(-0.58036, -1.00000, -0.19975)</td>\n",
" <td>(0.31892, 0.30513, 0.40993)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.30513, 0.51224, 0.58036)</td>\n",
" <td>(0.28002, 0.58036, 0.46898)</td>\n",
" <td>(-0.31892, -0.33375, -0.36759)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(0.58036, 0.58036, 0.58036)</td>\n",
" <td>(-1.00000, -1.00000, -1.00000)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 \\\n",
"0 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n",
"1 (0.58036, 0.58036, 0.58036) (0.46898, 0.15881, 0.46898) \n",
"2 (0.51224, 0.33375, 0.04133) (0.26847, 0.05422, -0.10077) \n",
"3 (-0.02218, -0.24694, -0.43642) (-0.51224, -0.43642, -0.43642) \n",
"4 (-0.58036, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"5 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"6 (-1.00000, -1.00000, -1.00000) (-1.00000, -0.58036, -0.51224) \n",
"7 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"8 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"9 (-1.00000, -0.33375, -0.17462) (-1.00000, -1.00000, -1.00000) \n",
"10 (0.26847, 0.33375, 0.58036) (-1.00000, -1.00000, -1.00000) \n",
"11 (0.58036, 0.58036, 0.58036) (-0.58036, -1.00000, -0.30513) \n",
"12 (0.58036, 0.58036, 0.58036) (-0.33375, -0.28002, -0.22710) \n",
"\n",
" 2 3 \\\n",
"0 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n",
"1 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n",
"2 (0.58036, 0.40993, 0.18279) (0.51224, 0.46898, 0.31892) \n",
"3 (0.04776, -0.10077, -0.23683) (-0.00950, -0.02855, -0.29221) \n",
"4 (-0.43642, -0.36759, -0.33375) (-0.38737, -1.00000, -1.00000) \n",
"5 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"6 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"7 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"8 (-1.00000, -1.00000, -1.00000) (-1.00000, -0.46898, -0.34987) \n",
"9 (-1.00000, -1.00000, -1.00000) (-0.19975, -0.16663, -0.06726) \n",
"10 (-1.00000, -1.00000, -1.00000) (-0.03492, -0.00950, 0.02218) \n",
"11 (-1.00000, -1.00000, -1.00000) (0.02855, 0.02855, 0.02855) \n",
"12 (-1.00000, -1.00000, -1.00000) (0.02218, 0.02855, 0.02855) \n",
"\n",
" 4 5 \\\n",
"0 (-1.00000, -1.00000, -0.46898) (0.36759, 0.43642, 0.51224) \n",
"1 (-0.16663, 0.24694, 0.05422) (0.43642, 0.30513, 0.40993) \n",
"2 (-0.13620, -0.29221, 0.06726) (0.25747, 0.13620, 0.06726) \n",
"3 (0.51224, 0.46898, 0.46898) (-0.10767, -0.31892, -0.46898) \n",
"4 (0.30513, -0.22710, -0.58036) (-1.00000, -1.00000, -1.00000) \n",
"5 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"6 (-1.00000, -0.43642, -0.08049) (-0.38737, 0.05422, 0.58036) \n",
"7 (0.11466, 0.16663, 0.12174) (0.58036, 0.58036, 0.58036) \n",
"8 (-0.03492, -0.25747, -0.36759) (0.58036, 0.58036, 0.58036) \n",
"9 (-0.46898, -0.40993, -0.24694) (0.58036, 0.58036, 0.58036) \n",
"10 (0.08049, 0.22710, 0.51224) (0.58036, 0.58036, 0.51224) \n",
"11 (0.58036, 0.58036, 0.58036) (0.51224, 0.43642, 0.40993) \n",
"12 (0.58036, 0.58036, 0.58036) (0.34987, 0.29221, 0.23683) \n",
"\n",
" 6 7 \\\n",
"0 (0.46898, 0.19116, -0.10077) (0.58036, 0.31892, -0.01583) \n",
"1 (-0.33375, -1.00000, -1.00000) (-1.00000, -0.19116, -0.12174) \n",
"2 (-1.00000, -1.00000, -0.58036) (0.43642, 0.46898, 0.46898) \n",
"3 (-0.33375, -0.10767, -0.02855) (0.10767, -0.58036, -1.00000) \n",
"4 (-0.00316, 0.02218, 0.04133) (-0.14360, 0.16663, 0.30513) \n",
"5 (0.04133, 0.05422, -0.04776) (0.33375, 0.30513, 0.18279) \n",
"6 (-0.22710, -0.46898, -0.51224) (-0.11466, -0.13620, -0.08049) \n",
"7 (-0.43642, -0.31892, 0.00316) (-0.14360, 0.10767, 0.36759) \n",
"8 (0.36759, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n",
"9 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n",
"10 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n",
"11 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n",
"12 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n",
"\n",
" 8 9 \\\n",
"0 (0.58036, 0.58036, 0.58036) (0.06726, 0.06726, 0.51224) \n",
"1 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n",
"2 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n",
"3 (0.58036, 0.58036, 0.58036) (0.51224, 0.29221, -0.19975) \n",
"4 (0.08718, 0.28002, -0.19116) (-0.14360, -0.25747, -0.46898) \n",
"5 (-0.51224, -0.31892, -0.58036) (-1.00000, -1.00000, -1.00000) \n",
"6 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"7 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"8 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"9 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -0.51224) \n",
"10 (-1.00000, -1.00000, -1.00000) (-0.10077, 0.09394, 0.03492) \n",
"11 (-1.00000, -1.00000, -1.00000) (-0.19975, -0.31892, -0.46898) \n",
"12 (-1.00000, -1.00000, -1.00000) (-0.58036, -1.00000, -0.19975) \n",
"\n",
" 10 11 \\\n",
"0 (-0.14360, -0.13620, 0.15881) (0.22710, 0.20859, 0.01583) \n",
"1 (0.58036, 0.58036, 0.58036) (-0.04776, -0.21769, -0.19975) \n",
"2 (0.58036, 0.58036, 0.58036) (-0.34987, -0.40993, -0.58036) \n",
"3 (0.51224, 0.17462, -0.30513) (-0.58036, -1.00000, -1.00000) \n",
"4 (-0.02218, 0.20859, 0.15881) (-1.00000, -1.00000, -1.00000) \n",
"5 (0.04776, -0.14360, -0.38737) (-1.00000, -1.00000, -1.00000) \n",
"6 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"7 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"8 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, 0.12174) \n",
"9 (-1.00000, -1.00000, -0.36759) (0.58036, 0.58036, 0.46898) \n",
"10 (0.43642, 0.51224, 0.51224) (-1.00000, 0.12174, 0.58036) \n",
"11 (0.40993, 0.34987, 0.29221) (0.58036, 0.58036, 0.58036) \n",
"12 (0.31892, 0.30513, 0.40993) (0.58036, 0.58036, 0.58036) \n",
"\n",
" 12 13 \\\n",
"0 (-1.00000, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n",
"1 (-1.00000, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n",
"2 (-1.00000, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n",
"3 (-1.00000, -1.00000, -1.00000) (0.58036, 0.51224, 0.51224) \n",
"4 (-1.00000, -1.00000, -0.58036) (0.17462, 0.00950, -0.20859) \n",
"5 (-0.21769, -0.16663, -0.13620) (-0.58036, -1.00000, -1.00000) \n",
"6 (0.31892, 0.58036, 0.51224) (-1.00000, -1.00000, -1.00000) \n",
"7 (0.51224, 0.51224, 0.43642) (-1.00000, -1.00000, -0.46898) \n",
"8 (0.43642, 0.40993, 0.40993) (-0.51224, -0.43642, -0.26847) \n",
"9 (0.38737, 0.43642, 0.46898) (-0.13620, 0.05422, 0.29221) \n",
"10 (0.51224, 0.58036, 0.58036) (0.38737, 0.58036, 0.51224) \n",
"11 (0.58036, 0.58036, 0.58036) (0.46898, 0.43642, 0.38737) \n",
"12 (0.58036, 0.58036, 0.58036) (0.30513, 0.51224, 0.58036) \n",
"\n",
" 14 15 \\\n",
"0 (0.58036, 0.58036, 0.58036) (-1.00000, -1.00000, -1.00000) \n",
"1 (0.58036, 0.58036, 0.38737) (-1.00000, -0.58036, -0.26847) \n",
"2 (0.03492, -0.40993, -1.00000) (-0.16663, 0.05422, 0.25747) \n",
"3 (-1.00000, -1.00000, -1.00000) (0.46898, 0.58036, 0.58036) \n",
"4 (-1.00000, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n",
"5 (-1.00000, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n",
"6 (-0.51224, -0.51224, -0.40993) (0.58036, 0.58036, 0.58036) \n",
"7 (-0.40993, -0.38737, -0.38737) (0.58036, 0.58036, 0.58036) \n",
"8 (-0.34987, -0.38737, -0.40993) (0.58036, 0.58036, 0.58036) \n",
"9 (-0.43642, -0.46898, -0.43642) (0.58036, 0.58036, 0.13620) \n",
"10 (-0.40993, -0.31892, -0.29221) (0.21769, 0.15114, 0.06072) \n",
"11 (-0.29221, -0.12892, 0.06726) (-0.05422, -0.10767, -0.24694) \n",
"12 (0.28002, 0.58036, 0.46898) (-0.31892, -0.33375, -0.36759) \n",
"\n",
" 16 17 \\\n",
"0 (0.58036, 0.58036, 0.58036) (0.58036, 0.33375, 0.29221) \n",
"1 (0.58036, 0.58036, 0.58036) (-0.14360, -0.16663, -0.10077) \n",
"2 (0.03492, -0.31892, -1.00000) (-0.58036, -1.00000, -1.00000) \n",
"3 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"4 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"5 (-1.00000, -1.00000, -1.00000) (-1.00000, -0.58036, -0.58036) \n",
"6 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"7 (-1.00000, -1.00000, -1.00000) (-0.58036, -0.58036, -0.40993) \n",
"8 (-0.58036, -0.46898, -0.40993) (-0.19975, 0.43642, 0.51224) \n",
"9 (-0.30513, -0.08049, -0.04133) (0.40993, 0.58036, 0.58036) \n",
"10 (-0.02855, -0.00316, 0.05422) (0.58036, 0.58036, 0.58036) \n",
"11 (0.40993, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n",
"12 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n",
"\n",
" 18 19 \n",
"0 (0.58036, 0.58036, 0.58036) (0.51224, 0.51224, 0.58036) \n",
"1 (0.51224, 0.38737, 0.28002) (0.58036, 0.58036, 0.58036) \n",
"2 (-0.08049, 0.06072, -0.36759) (0.58036, 0.58036, 0.58036) \n",
"3 (-0.58036, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n",
"4 (-1.00000, -1.00000, -1.00000) (0.04776, -0.02855, -0.36759) \n",
"5 (-1.00000, -1.00000, -1.00000) (-0.58036, -1.00000, -1.00000) \n",
"6 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"7 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"8 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n",
"9 (-1.00000, -0.46898, -0.16663) (-1.00000, -1.00000, -1.00000) \n",
"10 (-0.18279, 0.51224, 0.58036) (-1.00000, -1.00000, -1.00000) \n",
"11 (0.51224, 0.58036, 0.58036) (-1.00000, -1.00000, -1.00000) \n",
"12 (0.58036, 0.58036, 0.58036) (-1.00000, -1.00000, -1.00000) "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wordDfs[('X', 1)]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Process ForkPoolWorker-42:\n",
"Process ForkPoolWorker-43:\n",
"Process ForkPoolWorker-38:\n",
"Process ForkPoolWorker-44:\n",
"Process ForkPoolWorker-46:\n",
"Traceback (most recent call last):\n",
"Process ForkPoolWorker-41:\n",
"Process ForkPoolWorker-40:\n",
"Process ForkPoolWorker-45:\n",
"Process ForkPoolWorker-39:\n",
"Process ForkPoolWorker-37:\n",
"Process ForkPoolWorker-48:\n",
"Process ForkPoolWorker-47:\n",
"Traceback (most recent call last):\n",
"Traceback (most recent call last):\n",
"Traceback (most recent call last):\n",
"Traceback (most recent call last):\n",
"Traceback (most recent call last):\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
"Traceback (most recent call last):\n",
"Traceback (most recent call last):\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
"Traceback (most recent call last):\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
"Traceback (most recent call last):\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
"Traceback (most recent call last):\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
"Traceback (most recent call last):\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n",
" self.run()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
"KeyboardInterrupt\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
" task = get()\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 335, in get\n",
" res = self._reader.recv_bytes()\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
" File \"/usr/lib64/python3.5/multiprocessing/connection.py\", line 216, in recv_bytes\n",
" buf = self._recv_bytes(maxlength)\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
" File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n",
" with self._rlock:\n",
"KeyboardInterrupt\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
"KeyboardInterrupt\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
"KeyboardInterrupt\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
" File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n",
" return self._semlock.__enter__()\n",
"KeyboardInterrupt\n",
"KeyboardInterrupt\n",
" File \"/usr/lib64/python3.5/multiprocessing/connection.py\", line 407, in _recv_bytes\n",
" buf = self._recv(4)\n",
"KeyboardInterrupt\n",
"KeyboardInterrupt\n",
"KeyboardInterrupt\n",
"KeyboardInterrupt\n",
" File \"/usr/lib64/python3.5/multiprocessing/connection.py\", line 379, in _recv\n",
" chunk = read(handle, remaining)\n",
"KeyboardInterrupt\n",
"KeyboardInterrupt\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-13-845d2ee8dc13>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtf_idfs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlibword\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtfidf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwordDfs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnwords\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/_old/UNITO/lezioni/YearI/SecondSem/BDM/progetto/newsource/libword.py\u001b[0m in \u001b[0;36mtfidf\u001b[0;34m(odct, nwords)\u001b[0m\n\u001b[1;32m 155\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mlst\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 156\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcol\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miteritems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 157\u001b[0;31m \u001b[0mtfs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext_freq\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# associate every word with tf for each sensor\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0mtf_idfs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/_old/UNITO/lezioni/YearI/SecondSem/BDM/progetto/newsource/libword.py\u001b[0m in \u001b[0;36mtext_freq\u001b[0;34m(col)\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mtext_freq\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[0md\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 101\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mword\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcnt\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miteritems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 102\u001b[0m \u001b[0mhashable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mword\u001b[0m \u001b[0;31m# hashable to be used as index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhashable\u001b[0m \u001b[0;32min\u001b[0m \u001b[0md\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib64/python3.5/site-packages/pandas/core/base.py\u001b[0m in \u001b[0;36mvalue_counts\u001b[0;34m(self, normalize, sort, ascending, bins, dropna)\u001b[0m\n\u001b[1;32m 869\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0malgorithms\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mvalue_counts\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 870\u001b[0m result = value_counts(self, sort=sort, ascending=ascending,\n\u001b[0;32m--> 871\u001b[0;31m normalize=normalize, bins=bins, dropna=dropna)\n\u001b[0m\u001b[1;32m 872\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 873\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib64/python3.5/site-packages/pandas/core/algorithms.py\u001b[0m in \u001b[0;36mvalue_counts\u001b[0;34m(values, sort, ascending, normalize, bins, dropna)\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 558\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msort\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 559\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mascending\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mascending\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 560\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnormalize\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib64/python3.5/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36msort_values\u001b[0;34m(self, axis, ascending, inplace, kind, na_position)\u001b[0m\n\u001b[1;32m 1918\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mna_position\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'last'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1919\u001b[0m \u001b[0mn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgood\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1920\u001b[0;31m \u001b[0msortedIdx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mgood\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0margsorted\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1921\u001b[0m \u001b[0msortedIdx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbad\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1922\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mna_position\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'first'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib64/python3.5/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m__array__\u001b[0;34m(self, dtype)\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 563\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0m__array__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 564\u001b[0m \u001b[0;34m\"\"\" the array interface, return my values \"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 565\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"tf_idfs = libword.tfidf(wordDfs, nwords=3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"tf_idfs[('X', 1)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"g1 = odict((k,v) for k,v in wordDfs.items() if k[1] <= 31 )\n",
"g2 = odict((k,v) for k,v in wordDfs.items() if k[1] > 31 and k[1] < 280 )\n",
"g3 = odict((k,v) for k,v in wordDfs.items() if k[1] > 280 )\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"g1_tfidfs = libword.tfidf(g1, nwords=3)\n",
"g2_tfidfs = libword.tfidf(g2, nwords=3)\n",
"g3_tfidfs = libword.tfidf(g3, nwords=3)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"g1_tfidfs[('X', 1)]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"with open('wordDfs.pickle', 'wb') as fp:\n",
" pickle.dump(wordDfs, fp)\n",
"with open('discretizeDfs.pickle', 'wb') as fp:\n",
" pickle.dump(discretizeDfs, fp)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}