{ "cells": [ { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "import newsource.libword as libword\n", "import math\n", "from decimal import *\n", "import pandas as pd\n", "from collections import OrderedDict as odict\n", "\n", "datadir = \"./3classdata\"\n", "window = 3\n", "resolution = 50\n", "shift = 3" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import importlib\n", "importlib.reload(libword)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# read files from given directory\n", "def readDatadir(datadir):\n", " import glob\n", " frames = odict()\n", " for dfile in glob.glob(datadir+\"/*/*.csv\"):\n", " tmp = libword.read_csv(dfile)\n", " cord, numb = dfile.strip(datadir+'/').strip('.csv').split('/')\n", " frames[(cord, int(numb))] = tmp\n", " return frames" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "372" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "originalDfs = readDatadir(datadir)\n", "len(originalDfs)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "scrolled": true }, "outputs": [], "source": [ "#list(originalDfs.keys())" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# normalize between [-1,1]\n", "normalizeDfs = odict()\n", "for k, df in originalDfs.items():\n", " normalizeDfs[k] = libword.normalize(df)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "# discretize on a gaussian interval of 2*resolution steps\n", "discretizeDfs = dict()\n", "interval = libword.gaussian_interval(resolution)\n", "discretizeDfs = libword.parallel(target=libword.discretize, iterable=normalizeDfs, \n", " interval=interval, how=\"left\", n=12)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [], "source": [ "#print(interval)\n", "#discretizeDfs[0]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# extract words from dataset (from window,shift given)\n", "wordDfs = libword.parallel(target=libword.rolling_window, iterable=discretizeDfs,\n", " w=window, s=shift, n=12)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Process ForkPoolWorker-28:\n", "Process ForkPoolWorker-29:\n", "Process ForkPoolWorker-36:\n", "Process ForkPoolWorker-26:\n", "Process ForkPoolWorker-30:\n", "Process ForkPoolWorker-31:\n", "Process ForkPoolWorker-25:\n", "Process ForkPoolWorker-35:\n", "Process ForkPoolWorker-27:\n", "Process ForkPoolWorker-32:\n", "Process ForkPoolWorker-33:\n", "Traceback (most recent call last):\n", "Process ForkPoolWorker-34:\n", "Traceback (most recent call last):\n", "Traceback (most recent call last):\n", "Traceback (most recent call last):\n", "Traceback (most recent call last):\n", "Traceback (most recent call last):\n", "Traceback (most recent call last):\n", "Traceback (most recent call last):\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", "Traceback (most recent call last):\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", "Traceback (most recent call last):\n", "Traceback (most recent call last):\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", "Traceback (most recent call last):\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", "KeyboardInterrupt\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", "KeyboardInterrupt\n", "KeyboardInterrupt\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", "KeyboardInterrupt\n", "KeyboardInterrupt\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 335, in get\n", " res = self._reader.recv_bytes()\n", "KeyboardInterrupt\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", "KeyboardInterrupt\n", "KeyboardInterrupt\n", " File \"/usr/lib64/python3.5/multiprocessing/connection.py\", line 216, in recv_bytes\n", " buf = self._recv_bytes(maxlength)\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", "KeyboardInterrupt\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/usr/lib64/python3.5/multiprocessing/connection.py\", line 407, in _recv_bytes\n", " buf = self._recv(4)\n", "KeyboardInterrupt\n", "KeyboardInterrupt\n", " File \"/usr/lib64/python3.5/multiprocessing/connection.py\", line 379, in _recv\n", " chunk = read(handle, remaining)\n", "KeyboardInterrupt\n" ] } ], "source": [ "# TF: on single columns (monovariate series) of each dataframe\n", "# associate each series with its 3 most frequent words (according to TF)\n", "tf_Dfs = libword.parallel(target=libword.extract_words, iterable=wordDfs, \n", " how=libword.text_freq, n=12)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "#tf_Dfs[0]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012345678910111213141516171819
0(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -0.46898)(0.36759, 0.43642, 0.51224)(0.46898, 0.19116, -0.10077)(0.58036, 0.31892, -0.01583)(0.58036, 0.58036, 0.58036)(0.06726, 0.06726, 0.51224)(-0.14360, -0.13620, 0.15881)(0.22710, 0.20859, 0.01583)(-1.00000, -1.00000, -1.00000)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(0.58036, 0.58036, 0.58036)(0.58036, 0.33375, 0.29221)(0.58036, 0.58036, 0.58036)(0.51224, 0.51224, 0.58036)
1(0.58036, 0.58036, 0.58036)(0.46898, 0.15881, 0.46898)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-0.16663, 0.24694, 0.05422)(0.43642, 0.30513, 0.40993)(-0.33375, -1.00000, -1.00000)(-1.00000, -0.19116, -0.12174)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-0.04776, -0.21769, -0.19975)(-1.00000, -1.00000, -1.00000)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.38737)(-1.00000, -0.58036, -0.26847)(0.58036, 0.58036, 0.58036)(-0.14360, -0.16663, -0.10077)(0.51224, 0.38737, 0.28002)(0.58036, 0.58036, 0.58036)
2(0.51224, 0.33375, 0.04133)(0.26847, 0.05422, -0.10077)(0.58036, 0.40993, 0.18279)(0.51224, 0.46898, 0.31892)(-0.13620, -0.29221, 0.06726)(0.25747, 0.13620, 0.06726)(-1.00000, -1.00000, -0.58036)(0.43642, 0.46898, 0.46898)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-0.34987, -0.40993, -0.58036)(-1.00000, -1.00000, -1.00000)(0.58036, 0.58036, 0.58036)(0.03492, -0.40993, -1.00000)(-0.16663, 0.05422, 0.25747)(0.03492, -0.31892, -1.00000)(-0.58036, -1.00000, -1.00000)(-0.08049, 0.06072, -0.36759)(0.58036, 0.58036, 0.58036)
3(-0.02218, -0.24694, -0.43642)(-0.51224, -0.43642, -0.43642)(0.04776, -0.10077, -0.23683)(-0.00950, -0.02855, -0.29221)(0.51224, 0.46898, 0.46898)(-0.10767, -0.31892, -0.46898)(-0.33375, -0.10767, -0.02855)(0.10767, -0.58036, -1.00000)(0.58036, 0.58036, 0.58036)(0.51224, 0.29221, -0.19975)(0.51224, 0.17462, -0.30513)(-0.58036, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(0.58036, 0.51224, 0.51224)(-1.00000, -1.00000, -1.00000)(0.46898, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-0.58036, -1.00000, -1.00000)(0.58036, 0.58036, 0.58036)
4(-0.58036, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-0.43642, -0.36759, -0.33375)(-0.38737, -1.00000, -1.00000)(0.30513, -0.22710, -0.58036)(-1.00000, -1.00000, -1.00000)(-0.00316, 0.02218, 0.04133)(-0.14360, 0.16663, 0.30513)(0.08718, 0.28002, -0.19116)(-0.14360, -0.25747, -0.46898)(-0.02218, 0.20859, 0.15881)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -0.58036)(0.17462, 0.00950, -0.20859)(-1.00000, -1.00000, -1.00000)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(0.04776, -0.02855, -0.36759)
5(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(0.04133, 0.05422, -0.04776)(0.33375, 0.30513, 0.18279)(-0.51224, -0.31892, -0.58036)(-1.00000, -1.00000, -1.00000)(0.04776, -0.14360, -0.38737)(-1.00000, -1.00000, -1.00000)(-0.21769, -0.16663, -0.13620)(-0.58036, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-1.00000, -0.58036, -0.58036)(-1.00000, -1.00000, -1.00000)(-0.58036, -1.00000, -1.00000)
6(-1.00000, -1.00000, -1.00000)(-1.00000, -0.58036, -0.51224)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -0.43642, -0.08049)(-0.38737, 0.05422, 0.58036)(-0.22710, -0.46898, -0.51224)(-0.11466, -0.13620, -0.08049)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(0.31892, 0.58036, 0.51224)(-1.00000, -1.00000, -1.00000)(-0.51224, -0.51224, -0.40993)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)
7(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(0.11466, 0.16663, 0.12174)(0.58036, 0.58036, 0.58036)(-0.43642, -0.31892, 0.00316)(-0.14360, 0.10767, 0.36759)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(0.51224, 0.51224, 0.43642)(-1.00000, -1.00000, -0.46898)(-0.40993, -0.38737, -0.38737)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-0.58036, -0.58036, -0.40993)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)
8(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -0.46898, -0.34987)(-0.03492, -0.25747, -0.36759)(0.58036, 0.58036, 0.58036)(0.36759, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, 0.12174)(0.43642, 0.40993, 0.40993)(-0.51224, -0.43642, -0.26847)(-0.34987, -0.38737, -0.40993)(0.58036, 0.58036, 0.58036)(-0.58036, -0.46898, -0.40993)(-0.19975, 0.43642, 0.51224)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)
9(-1.00000, -0.33375, -0.17462)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-0.19975, -0.16663, -0.06726)(-0.46898, -0.40993, -0.24694)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -0.51224)(-1.00000, -1.00000, -0.36759)(0.58036, 0.58036, 0.46898)(0.38737, 0.43642, 0.46898)(-0.13620, 0.05422, 0.29221)(-0.43642, -0.46898, -0.43642)(0.58036, 0.58036, 0.13620)(-0.30513, -0.08049, -0.04133)(0.40993, 0.58036, 0.58036)(-1.00000, -0.46898, -0.16663)(-1.00000, -1.00000, -1.00000)
10(0.26847, 0.33375, 0.58036)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-0.03492, -0.00950, 0.02218)(0.08049, 0.22710, 0.51224)(0.58036, 0.58036, 0.51224)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-0.10077, 0.09394, 0.03492)(0.43642, 0.51224, 0.51224)(-1.00000, 0.12174, 0.58036)(0.51224, 0.58036, 0.58036)(0.38737, 0.58036, 0.51224)(-0.40993, -0.31892, -0.29221)(0.21769, 0.15114, 0.06072)(-0.02855, -0.00316, 0.05422)(0.58036, 0.58036, 0.58036)(-0.18279, 0.51224, 0.58036)(-1.00000, -1.00000, -1.00000)
11(0.58036, 0.58036, 0.58036)(-0.58036, -1.00000, -0.30513)(-1.00000, -1.00000, -1.00000)(0.02855, 0.02855, 0.02855)(0.58036, 0.58036, 0.58036)(0.51224, 0.43642, 0.40993)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-0.19975, -0.31892, -0.46898)(0.40993, 0.34987, 0.29221)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.46898, 0.43642, 0.38737)(-0.29221, -0.12892, 0.06726)(-0.05422, -0.10767, -0.24694)(0.40993, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.51224, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)
12(0.58036, 0.58036, 0.58036)(-0.33375, -0.28002, -0.22710)(-1.00000, -1.00000, -1.00000)(0.02218, 0.02855, 0.02855)(0.58036, 0.58036, 0.58036)(0.34987, 0.29221, 0.23683)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-0.58036, -1.00000, -0.19975)(0.31892, 0.30513, 0.40993)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.30513, 0.51224, 0.58036)(0.28002, 0.58036, 0.46898)(-0.31892, -0.33375, -0.36759)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)
\n", "
" ], "text/plain": [ " 0 1 \\\n", "0 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "1 (0.58036, 0.58036, 0.58036) (0.46898, 0.15881, 0.46898) \n", "2 (0.51224, 0.33375, 0.04133) (0.26847, 0.05422, -0.10077) \n", "3 (-0.02218, -0.24694, -0.43642) (-0.51224, -0.43642, -0.43642) \n", "4 (-0.58036, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "5 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "6 (-1.00000, -1.00000, -1.00000) (-1.00000, -0.58036, -0.51224) \n", "7 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "8 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "9 (-1.00000, -0.33375, -0.17462) (-1.00000, -1.00000, -1.00000) \n", "10 (0.26847, 0.33375, 0.58036) (-1.00000, -1.00000, -1.00000) \n", "11 (0.58036, 0.58036, 0.58036) (-0.58036, -1.00000, -0.30513) \n", "12 (0.58036, 0.58036, 0.58036) (-0.33375, -0.28002, -0.22710) \n", "\n", " 2 3 \\\n", "0 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "1 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "2 (0.58036, 0.40993, 0.18279) (0.51224, 0.46898, 0.31892) \n", "3 (0.04776, -0.10077, -0.23683) (-0.00950, -0.02855, -0.29221) \n", "4 (-0.43642, -0.36759, -0.33375) (-0.38737, -1.00000, -1.00000) \n", "5 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "6 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "7 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "8 (-1.00000, -1.00000, -1.00000) (-1.00000, -0.46898, -0.34987) \n", "9 (-1.00000, -1.00000, -1.00000) (-0.19975, -0.16663, -0.06726) \n", "10 (-1.00000, -1.00000, -1.00000) (-0.03492, -0.00950, 0.02218) \n", "11 (-1.00000, -1.00000, -1.00000) (0.02855, 0.02855, 0.02855) \n", "12 (-1.00000, -1.00000, -1.00000) (0.02218, 0.02855, 0.02855) \n", "\n", " 4 5 \\\n", "0 (-1.00000, -1.00000, -0.46898) (0.36759, 0.43642, 0.51224) \n", "1 (-0.16663, 0.24694, 0.05422) (0.43642, 0.30513, 0.40993) \n", "2 (-0.13620, -0.29221, 0.06726) (0.25747, 0.13620, 0.06726) \n", "3 (0.51224, 0.46898, 0.46898) (-0.10767, -0.31892, -0.46898) \n", "4 (0.30513, -0.22710, -0.58036) (-1.00000, -1.00000, -1.00000) \n", "5 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "6 (-1.00000, -0.43642, -0.08049) (-0.38737, 0.05422, 0.58036) \n", "7 (0.11466, 0.16663, 0.12174) (0.58036, 0.58036, 0.58036) \n", "8 (-0.03492, -0.25747, -0.36759) (0.58036, 0.58036, 0.58036) \n", "9 (-0.46898, -0.40993, -0.24694) (0.58036, 0.58036, 0.58036) \n", "10 (0.08049, 0.22710, 0.51224) (0.58036, 0.58036, 0.51224) \n", "11 (0.58036, 0.58036, 0.58036) (0.51224, 0.43642, 0.40993) \n", "12 (0.58036, 0.58036, 0.58036) (0.34987, 0.29221, 0.23683) \n", "\n", " 6 7 \\\n", "0 (0.46898, 0.19116, -0.10077) (0.58036, 0.31892, -0.01583) \n", "1 (-0.33375, -1.00000, -1.00000) (-1.00000, -0.19116, -0.12174) \n", "2 (-1.00000, -1.00000, -0.58036) (0.43642, 0.46898, 0.46898) \n", "3 (-0.33375, -0.10767, -0.02855) (0.10767, -0.58036, -1.00000) \n", "4 (-0.00316, 0.02218, 0.04133) (-0.14360, 0.16663, 0.30513) \n", "5 (0.04133, 0.05422, -0.04776) (0.33375, 0.30513, 0.18279) \n", "6 (-0.22710, -0.46898, -0.51224) (-0.11466, -0.13620, -0.08049) \n", "7 (-0.43642, -0.31892, 0.00316) (-0.14360, 0.10767, 0.36759) \n", "8 (0.36759, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "9 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "10 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "11 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "12 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "\n", " 8 9 \\\n", "0 (0.58036, 0.58036, 0.58036) (0.06726, 0.06726, 0.51224) \n", "1 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "2 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "3 (0.58036, 0.58036, 0.58036) (0.51224, 0.29221, -0.19975) \n", "4 (0.08718, 0.28002, -0.19116) (-0.14360, -0.25747, -0.46898) \n", "5 (-0.51224, -0.31892, -0.58036) (-1.00000, -1.00000, -1.00000) \n", "6 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "7 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "8 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "9 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -0.51224) \n", "10 (-1.00000, -1.00000, -1.00000) (-0.10077, 0.09394, 0.03492) \n", "11 (-1.00000, -1.00000, -1.00000) (-0.19975, -0.31892, -0.46898) \n", "12 (-1.00000, -1.00000, -1.00000) (-0.58036, -1.00000, -0.19975) \n", "\n", " 10 11 \\\n", "0 (-0.14360, -0.13620, 0.15881) (0.22710, 0.20859, 0.01583) \n", "1 (0.58036, 0.58036, 0.58036) (-0.04776, -0.21769, -0.19975) \n", "2 (0.58036, 0.58036, 0.58036) (-0.34987, -0.40993, -0.58036) \n", "3 (0.51224, 0.17462, -0.30513) (-0.58036, -1.00000, -1.00000) \n", "4 (-0.02218, 0.20859, 0.15881) (-1.00000, -1.00000, -1.00000) \n", "5 (0.04776, -0.14360, -0.38737) (-1.00000, -1.00000, -1.00000) \n", "6 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "7 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "8 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, 0.12174) \n", "9 (-1.00000, -1.00000, -0.36759) (0.58036, 0.58036, 0.46898) \n", "10 (0.43642, 0.51224, 0.51224) (-1.00000, 0.12174, 0.58036) \n", "11 (0.40993, 0.34987, 0.29221) (0.58036, 0.58036, 0.58036) \n", "12 (0.31892, 0.30513, 0.40993) (0.58036, 0.58036, 0.58036) \n", "\n", " 12 13 \\\n", "0 (-1.00000, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n", "1 (-1.00000, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n", "2 (-1.00000, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n", "3 (-1.00000, -1.00000, -1.00000) (0.58036, 0.51224, 0.51224) \n", "4 (-1.00000, -1.00000, -0.58036) (0.17462, 0.00950, -0.20859) \n", "5 (-0.21769, -0.16663, -0.13620) (-0.58036, -1.00000, -1.00000) \n", "6 (0.31892, 0.58036, 0.51224) (-1.00000, -1.00000, -1.00000) \n", "7 (0.51224, 0.51224, 0.43642) (-1.00000, -1.00000, -0.46898) \n", "8 (0.43642, 0.40993, 0.40993) (-0.51224, -0.43642, -0.26847) \n", "9 (0.38737, 0.43642, 0.46898) (-0.13620, 0.05422, 0.29221) \n", "10 (0.51224, 0.58036, 0.58036) (0.38737, 0.58036, 0.51224) \n", "11 (0.58036, 0.58036, 0.58036) (0.46898, 0.43642, 0.38737) \n", "12 (0.58036, 0.58036, 0.58036) (0.30513, 0.51224, 0.58036) \n", "\n", " 14 15 \\\n", "0 (0.58036, 0.58036, 0.58036) (-1.00000, -1.00000, -1.00000) \n", "1 (0.58036, 0.58036, 0.38737) (-1.00000, -0.58036, -0.26847) \n", "2 (0.03492, -0.40993, -1.00000) (-0.16663, 0.05422, 0.25747) \n", "3 (-1.00000, -1.00000, -1.00000) (0.46898, 0.58036, 0.58036) \n", "4 (-1.00000, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n", "5 (-1.00000, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n", "6 (-0.51224, -0.51224, -0.40993) (0.58036, 0.58036, 0.58036) \n", "7 (-0.40993, -0.38737, -0.38737) (0.58036, 0.58036, 0.58036) \n", "8 (-0.34987, -0.38737, -0.40993) (0.58036, 0.58036, 0.58036) \n", "9 (-0.43642, -0.46898, -0.43642) (0.58036, 0.58036, 0.13620) \n", "10 (-0.40993, -0.31892, -0.29221) (0.21769, 0.15114, 0.06072) \n", "11 (-0.29221, -0.12892, 0.06726) (-0.05422, -0.10767, -0.24694) \n", "12 (0.28002, 0.58036, 0.46898) (-0.31892, -0.33375, -0.36759) \n", "\n", " 16 17 \\\n", "0 (0.58036, 0.58036, 0.58036) (0.58036, 0.33375, 0.29221) \n", "1 (0.58036, 0.58036, 0.58036) (-0.14360, -0.16663, -0.10077) \n", "2 (0.03492, -0.31892, -1.00000) (-0.58036, -1.00000, -1.00000) \n", "3 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "4 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "5 (-1.00000, -1.00000, -1.00000) (-1.00000, -0.58036, -0.58036) \n", "6 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "7 (-1.00000, -1.00000, -1.00000) (-0.58036, -0.58036, -0.40993) \n", "8 (-0.58036, -0.46898, -0.40993) (-0.19975, 0.43642, 0.51224) \n", "9 (-0.30513, -0.08049, -0.04133) (0.40993, 0.58036, 0.58036) \n", "10 (-0.02855, -0.00316, 0.05422) (0.58036, 0.58036, 0.58036) \n", "11 (0.40993, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "12 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "\n", " 18 19 \n", "0 (0.58036, 0.58036, 0.58036) (0.51224, 0.51224, 0.58036) \n", "1 (0.51224, 0.38737, 0.28002) (0.58036, 0.58036, 0.58036) \n", "2 (-0.08049, 0.06072, -0.36759) (0.58036, 0.58036, 0.58036) \n", "3 (-0.58036, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n", "4 (-1.00000, -1.00000, -1.00000) (0.04776, -0.02855, -0.36759) \n", "5 (-1.00000, -1.00000, -1.00000) (-0.58036, -1.00000, -1.00000) \n", "6 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "7 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "8 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "9 (-1.00000, -0.46898, -0.16663) (-1.00000, -1.00000, -1.00000) \n", "10 (-0.18279, 0.51224, 0.58036) (-1.00000, -1.00000, -1.00000) \n", "11 (0.51224, 0.58036, 0.58036) (-1.00000, -1.00000, -1.00000) \n", "12 (0.58036, 0.58036, 0.58036) (-1.00000, -1.00000, -1.00000) " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wordDfs[('X', 1)]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Process ForkPoolWorker-42:\n", "Process ForkPoolWorker-43:\n", "Process ForkPoolWorker-38:\n", "Process ForkPoolWorker-44:\n", "Process ForkPoolWorker-46:\n", "Traceback (most recent call last):\n", "Process ForkPoolWorker-41:\n", "Process ForkPoolWorker-40:\n", "Process ForkPoolWorker-45:\n", "Process ForkPoolWorker-39:\n", "Process ForkPoolWorker-37:\n", "Process ForkPoolWorker-48:\n", "Process ForkPoolWorker-47:\n", "Traceback (most recent call last):\n", "Traceback (most recent call last):\n", "Traceback (most recent call last):\n", "Traceback (most recent call last):\n", "Traceback (most recent call last):\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", "Traceback (most recent call last):\n", "Traceback (most recent call last):\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", "Traceback (most recent call last):\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", "Traceback (most recent call last):\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", "Traceback (most recent call last):\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", "Traceback (most recent call last):\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 252, in _bootstrap\n", " self.run()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", "KeyboardInterrupt\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/pool.py\", line 108, in worker\n", " task = get()\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 335, in get\n", " res = self._reader.recv_bytes()\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/usr/lib64/python3.5/multiprocessing/connection.py\", line 216, in recv_bytes\n", " buf = self._recv_bytes(maxlength)\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/usr/lib64/python3.5/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", "KeyboardInterrupt\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", "KeyboardInterrupt\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", "KeyboardInterrupt\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/usr/lib64/python3.5/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", "KeyboardInterrupt\n", "KeyboardInterrupt\n", " File \"/usr/lib64/python3.5/multiprocessing/connection.py\", line 407, in _recv_bytes\n", " buf = self._recv(4)\n", "KeyboardInterrupt\n", "KeyboardInterrupt\n", "KeyboardInterrupt\n", "KeyboardInterrupt\n", " File \"/usr/lib64/python3.5/multiprocessing/connection.py\", line 379, in _recv\n", " chunk = read(handle, remaining)\n", "KeyboardInterrupt\n", "KeyboardInterrupt\n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtf_idfs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlibword\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtfidf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwordDfs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnwords\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/_old/UNITO/lezioni/YearI/SecondSem/BDM/progetto/newsource/libword.py\u001b[0m in \u001b[0;36mtfidf\u001b[0;34m(odct, nwords)\u001b[0m\n\u001b[1;32m 155\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mlst\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 156\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcol\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miteritems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 157\u001b[0;31m \u001b[0mtfs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext_freq\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# associate every word with tf for each sensor\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0mtf_idfs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/_old/UNITO/lezioni/YearI/SecondSem/BDM/progetto/newsource/libword.py\u001b[0m in \u001b[0;36mtext_freq\u001b[0;34m(col)\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mtext_freq\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[0md\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 101\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mword\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcnt\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miteritems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 102\u001b[0m \u001b[0mhashable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mword\u001b[0m \u001b[0;31m# hashable to be used as index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhashable\u001b[0m \u001b[0;32min\u001b[0m \u001b[0md\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib64/python3.5/site-packages/pandas/core/base.py\u001b[0m in \u001b[0;36mvalue_counts\u001b[0;34m(self, normalize, sort, ascending, bins, dropna)\u001b[0m\n\u001b[1;32m 869\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0malgorithms\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mvalue_counts\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 870\u001b[0m result = value_counts(self, sort=sort, ascending=ascending,\n\u001b[0;32m--> 871\u001b[0;31m normalize=normalize, bins=bins, dropna=dropna)\n\u001b[0m\u001b[1;32m 872\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 873\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib64/python3.5/site-packages/pandas/core/algorithms.py\u001b[0m in \u001b[0;36mvalue_counts\u001b[0;34m(values, sort, ascending, normalize, bins, dropna)\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 558\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msort\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 559\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mascending\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mascending\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 560\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnormalize\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib64/python3.5/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36msort_values\u001b[0;34m(self, axis, ascending, inplace, kind, na_position)\u001b[0m\n\u001b[1;32m 1918\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mna_position\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'last'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1919\u001b[0m \u001b[0mn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgood\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1920\u001b[0;31m \u001b[0msortedIdx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mgood\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0margsorted\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1921\u001b[0m \u001b[0msortedIdx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbad\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1922\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mna_position\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'first'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib64/python3.5/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m__array__\u001b[0;34m(self, dtype)\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 563\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0m__array__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 564\u001b[0m \u001b[0;34m\"\"\" the array interface, return my values \"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 565\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "tf_idfs = libword.tfidf(wordDfs, nwords=3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "tf_idfs[('X', 1)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "g1 = odict((k,v) for k,v in wordDfs.items() if k[1] <= 31 )\n", "g2 = odict((k,v) for k,v in wordDfs.items() if k[1] > 31 and k[1] < 280 )\n", "g3 = odict((k,v) for k,v in wordDfs.items() if k[1] > 280 )\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "g1_tfidfs = libword.tfidf(g1, nwords=3)\n", "g2_tfidfs = libword.tfidf(g2, nwords=3)\n", "g3_tfidfs = libword.tfidf(g3, nwords=3)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "g1_tfidfs[('X', 1)]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "with open('wordDfs.pickle', 'wb') as fp:\n", " pickle.dump(wordDfs, fp)\n", "with open('discretizeDfs.pickle', 'wb') as fp:\n", " pickle.dump(discretizeDfs, fp)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.5" } }, "nbformat": 4, "nbformat_minor": 2 }