{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import newsource.libword as libword\n", "import math\n", "from decimal import *\n", "import pandas as pd\n", "from collections import OrderedDict as odict\n", "\n", "datadir = \"./3classdata\"\n", "window = 3\n", "resolution = 50\n", "shift = 3" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import importlib\n", "importlib.reload(libword)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# read files from given directory\n", "def readDatadir(datadir):\n", " import glob\n", " frames = odict()\n", " for dfile in glob.glob(datadir+\"/*/*.csv\"):\n", " tmp = libword.read_csv(dfile)\n", " cord, numb = dfile.strip(datadir+'/').strip('.csv').split('/')\n", " frames[(cord, int(numb))] = tmp\n", " return frames" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "372" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "originalDfs = readDatadir(datadir)\n", "len(originalDfs)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "[('Z', 574),\n", " ('Z', 22),\n", " ('Z', 579),\n", " ('Z', 270),\n", " ('Z', 268),\n", " ('Z', 563),\n", " ('Z', 568),\n", " ('Z', 571),\n", " ('Z', 269),\n", " ('Z', 29),\n", " ('Z', 570),\n", " ('Z', 274),\n", " ('Z', 277),\n", " ('Z', 250),\n", " ('Z', 583),\n", " ('Z', 264),\n", " ('Z', 2),\n", " ('Z', 11),\n", " ('Z', 261),\n", " ('Z', 588),\n", " ('Z', 258),\n", " ('Z', 589),\n", " ('Z', 9),\n", " ('Z', 12),\n", " ('Z', 23),\n", " ('Z', 580),\n", " ('Z', 576),\n", " ('Z', 275),\n", " ('Z', 257),\n", " ('Z', 577),\n", " ('Z', 262),\n", " ('Z', 276),\n", " ('Z', 578),\n", " ('Z', 20),\n", " ('Z', 562),\n", " ('Z', 263),\n", " ('Z', 273),\n", " ('Z', 256),\n", " ('Z', 30),\n", " ('Z', 259),\n", " ('Z', 585),\n", " ('Z', 31),\n", " ('Z', 279),\n", " ('Z', 25),\n", " ('Z', 565),\n", " ('Z', 575),\n", " ('Z', 251),\n", " ('Z', 278),\n", " ('Z', 560),\n", " ('Z', 584),\n", " ('Z', 14),\n", " ('Z', 27),\n", " ('Z', 561),\n", " ('Z', 265),\n", " ('Z', 24),\n", " ('Z', 19),\n", " ('Z', 10),\n", " ('Z', 569),\n", " ('Z', 587),\n", " ('Z', 559),\n", " ('Z', 8),\n", " ('Z', 271),\n", " ('Z', 272),\n", " ('Z', 1),\n", " ('Z', 567),\n", " ('Z', 582),\n", " ('Z', 17),\n", " ('Z', 260),\n", " ('Z', 5),\n", " ('Z', 16),\n", " ('Z', 3),\n", " ('Z', 267),\n", " ('Z', 255),\n", " ('Z', 26),\n", " ('Z', 249),\n", " ('Z', 581),\n", " ('Z', 573),\n", " ('Z', 28),\n", " ('Z', 6),\n", " ('Z', 13),\n", " ('Z', 572),\n", " ('Z', 564),\n", " ('Z', 252),\n", " ('Z', 586),\n", " ('Z', 18),\n", " ('Z', 15),\n", " ('Z', 253),\n", " ('Z', 4),\n", " ('Z', 21),\n", " ('Z', 7),\n", " ('Z', 254),\n", " ('Z', 266),\n", " ('Z', 566),\n", " ('X', 574),\n", " ('X', 22),\n", " ('X', 579),\n", " ('X', 270),\n", " ('X', 268),\n", " ('X', 563),\n", " ('X', 568),\n", " ('X', 571),\n", " ('X', 269),\n", " ('X', 29),\n", " ('X', 570),\n", " ('X', 274),\n", " ('X', 277),\n", " ('X', 250),\n", " ('X', 583),\n", " ('X', 264),\n", " ('X', 2),\n", " ('X', 11),\n", " ('X', 261),\n", " ('X', 588),\n", " ('X', 258),\n", " ('X', 589),\n", " ('X', 9),\n", " ('X', 12),\n", " ('X', 23),\n", " ('X', 580),\n", " ('X', 576),\n", " ('X', 275),\n", " ('X', 257),\n", " ('X', 577),\n", " ('X', 262),\n", " ('X', 276),\n", " ('X', 578),\n", " ('X', 20),\n", " ('X', 562),\n", " ('X', 263),\n", " ('X', 273),\n", " ('X', 256),\n", " ('X', 30),\n", " ('X', 259),\n", " ('X', 585),\n", " ('X', 31),\n", " ('X', 279),\n", " ('X', 25),\n", " ('X', 565),\n", " ('X', 575),\n", " ('X', 251),\n", " ('X', 278),\n", " ('X', 560),\n", " ('X', 584),\n", " ('X', 14),\n", " ('X', 27),\n", " ('X', 561),\n", " ('X', 265),\n", " ('X', 24),\n", " ('X', 19),\n", " ('X', 10),\n", " ('X', 569),\n", " ('X', 587),\n", " ('X', 559),\n", " ('X', 8),\n", " ('X', 271),\n", " ('X', 272),\n", " ('X', 1),\n", " ('X', 567),\n", " ('X', 582),\n", " ('X', 17),\n", " ('X', 260),\n", " ('X', 5),\n", " ('X', 16),\n", " ('X', 3),\n", " ('X', 267),\n", " ('X', 255),\n", " ('X', 26),\n", " ('X', 249),\n", " ('X', 581),\n", " ('X', 573),\n", " ('X', 28),\n", " ('X', 6),\n", " ('X', 13),\n", " ('X', 572),\n", " ('X', 564),\n", " ('X', 252),\n", " ('X', 586),\n", " ('X', 18),\n", " ('X', 15),\n", " ('X', 253),\n", " ('X', 4),\n", " ('X', 21),\n", " ('X', 7),\n", " ('X', 254),\n", " ('X', 266),\n", " ('X', 566),\n", " ('Y', 574),\n", " ('Y', 22),\n", " ('Y', 579),\n", " ('Y', 270),\n", " ('Y', 268),\n", " ('Y', 563),\n", " ('Y', 568),\n", " ('Y', 571),\n", " ('Y', 269),\n", " ('Y', 29),\n", " ('Y', 570),\n", " ('Y', 274),\n", " ('Y', 277),\n", " ('Y', 250),\n", " ('Y', 583),\n", " ('Y', 264),\n", " ('Y', 2),\n", " ('Y', 11),\n", " ('Y', 261),\n", " ('Y', 588),\n", " ('Y', 258),\n", " ('Y', 589),\n", " ('Y', 9),\n", " ('Y', 12),\n", " ('Y', 23),\n", " ('Y', 580),\n", " ('Y', 576),\n", " ('Y', 275),\n", " ('Y', 257),\n", " ('Y', 577),\n", " ('Y', 262),\n", " ('Y', 276),\n", " ('Y', 578),\n", " ('Y', 20),\n", " ('Y', 562),\n", " ('Y', 263),\n", " ('Y', 273),\n", " ('Y', 256),\n", " ('Y', 30),\n", " ('Y', 259),\n", " ('Y', 585),\n", " ('Y', 31),\n", " ('Y', 279),\n", " ('Y', 25),\n", " ('Y', 565),\n", " ('Y', 575),\n", " ('Y', 251),\n", " ('Y', 278),\n", " ('Y', 560),\n", " ('Y', 584),\n", " ('Y', 14),\n", " ('Y', 27),\n", " ('Y', 561),\n", " ('Y', 265),\n", " ('Y', 24),\n", " ('Y', 19),\n", " ('Y', 10),\n", " ('Y', 569),\n", " ('Y', 587),\n", " ('Y', 559),\n", " ('Y', 8),\n", " ('Y', 271),\n", " ('Y', 272),\n", " ('Y', 1),\n", " ('Y', 567),\n", " ('Y', 582),\n", " ('Y', 17),\n", " ('Y', 260),\n", " ('Y', 5),\n", " ('Y', 16),\n", " ('Y', 3),\n", " ('Y', 267),\n", " ('Y', 255),\n", " ('Y', 26),\n", " ('Y', 249),\n", " ('Y', 581),\n", " ('Y', 573),\n", " ('Y', 28),\n", " ('Y', 6),\n", " ('Y', 13),\n", " ('Y', 572),\n", " ('Y', 564),\n", " ('Y', 252),\n", " ('Y', 586),\n", " ('Y', 18),\n", " ('Y', 15),\n", " ('Y', 253),\n", " ('Y', 4),\n", " ('Y', 21),\n", " ('Y', 7),\n", " ('Y', 254),\n", " ('Y', 266),\n", " ('Y', 566),\n", " ('W', 574),\n", " ('W', 22),\n", " ('W', 579),\n", " ('W', 270),\n", " ('W', 268),\n", " ('W', 563),\n", " ('W', 568),\n", " ('W', 571),\n", " ('W', 269),\n", " ('W', 29),\n", " ('W', 570),\n", " ('W', 274),\n", " ('W', 277),\n", " ('W', 250),\n", " ('W', 583),\n", " ('W', 264),\n", " ('W', 2),\n", " ('W', 11),\n", " ('W', 261),\n", " ('W', 588),\n", " ('W', 258),\n", " ('W', 589),\n", " ('W', 9),\n", " ('W', 12),\n", " ('W', 23),\n", " ('W', 580),\n", " ('W', 576),\n", " ('W', 275),\n", " ('W', 257),\n", " ('W', 577),\n", " ('W', 262),\n", " ('W', 276),\n", " ('W', 578),\n", " ('W', 20),\n", " ('W', 562),\n", " ('W', 263),\n", " ('W', 273),\n", " ('W', 256),\n", " ('W', 30),\n", " ('W', 259),\n", " ('W', 585),\n", " ('W', 31),\n", " ('W', 279),\n", " ('W', 25),\n", " ('W', 565),\n", " ('W', 575),\n", " ('W', 251),\n", " ('W', 278),\n", " ('W', 560),\n", " ('W', 584),\n", " ('W', 14),\n", " ('W', 27),\n", " ('W', 561),\n", " ('W', 265),\n", " ('W', 24),\n", " ('W', 19),\n", " ('W', 10),\n", " ('W', 569),\n", " ('W', 587),\n", " ('W', 559),\n", " ('W', 8),\n", " ('W', 271),\n", " ('W', 272),\n", " ('W', 1),\n", " ('W', 567),\n", " ('W', 582),\n", " ('W', 17),\n", " ('W', 260),\n", " ('W', 5),\n", " ('W', 16),\n", " ('W', 3),\n", " ('W', 267),\n", " ('W', 255),\n", " ('W', 26),\n", " ('W', 249),\n", " ('W', 581),\n", " ('W', 573),\n", " ('W', 28),\n", " ('W', 6),\n", " ('W', 13),\n", " ('W', 572),\n", " ('W', 564),\n", " ('W', 252),\n", " ('W', 586),\n", " ('W', 18),\n", " ('W', 15),\n", " ('W', 253),\n", " ('W', 4),\n", " ('W', 21),\n", " ('W', 7),\n", " ('W', 254),\n", " ('W', 266),\n", " ('W', 566)]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(originalDfs.keys())" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# normalize between [-1,1]\n", "normalizeDfs = odict()\n", "for k, df in originalDfs.items():\n", " normalizeDfs[k] = libword.normalize(df)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# discretize on a gaussian interval of 2*resolution steps\n", "discretizeDfs = dict()\n", "interval = libword.gaussian_interval(resolution)\n", "discretizeDfs = libword.parallel(target=libword.discretize, iterable=normalizeDfs, \n", " interval=interval, how=\"left\", n=12)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [], "source": [ "#print(interval)\n", "#discretizeDfs[0]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# extract words from dataset (from window,shift given)\n", "wordDfs = libword.parallel(target=libword.rolling_window, iterable=discretizeDfs,\n", " w=window, s=shift, n=12)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# TF: on single columns (monovariate series) of each dataframe\n", "# associate each series with its 3 most frequent words (according to TF)\n", "tf_Dfs = libword.parallel(target=libword.extract_words, iterable=wordDfs, \n", " how=libword.text_freq, n=12)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "#tf_Dfs[0]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012345678910111213141516171819
0(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -0.46898)(0.36759, 0.43642, 0.51224)(0.46898, 0.19116, -0.10077)(0.58036, 0.31892, -0.01583)(0.58036, 0.58036, 0.58036)(0.06726, 0.06726, 0.51224)(-0.14360, -0.13620, 0.15881)(0.22710, 0.20859, 0.01583)(-1.00000, -1.00000, -1.00000)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(0.58036, 0.58036, 0.58036)(0.58036, 0.33375, 0.29221)(0.58036, 0.58036, 0.58036)(0.51224, 0.51224, 0.58036)
1(0.58036, 0.58036, 0.58036)(0.46898, 0.15881, 0.46898)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-0.16663, 0.24694, 0.05422)(0.43642, 0.30513, 0.40993)(-0.33375, -1.00000, -1.00000)(-1.00000, -0.19116, -0.12174)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-0.04776, -0.21769, -0.19975)(-1.00000, -1.00000, -1.00000)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.38737)(-1.00000, -0.58036, -0.26847)(0.58036, 0.58036, 0.58036)(-0.14360, -0.16663, -0.10077)(0.51224, 0.38737, 0.28002)(0.58036, 0.58036, 0.58036)
2(0.51224, 0.33375, 0.04133)(0.26847, 0.05422, -0.10077)(0.58036, 0.40993, 0.18279)(0.51224, 0.46898, 0.31892)(-0.13620, -0.29221, 0.06726)(0.25747, 0.13620, 0.06726)(-1.00000, -1.00000, -0.58036)(0.43642, 0.46898, 0.46898)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-0.34987, -0.40993, -0.58036)(-1.00000, -1.00000, -1.00000)(0.58036, 0.58036, 0.58036)(0.03492, -0.40993, -1.00000)(-0.16663, 0.05422, 0.25747)(0.03492, -0.31892, -1.00000)(-0.58036, -1.00000, -1.00000)(-0.08049, 0.06072, -0.36759)(0.58036, 0.58036, 0.58036)
3(-0.02218, -0.24694, -0.43642)(-0.51224, -0.43642, -0.43642)(0.04776, -0.10077, -0.23683)(-0.00950, -0.02855, -0.29221)(0.51224, 0.46898, 0.46898)(-0.10767, -0.31892, -0.46898)(-0.33375, -0.10767, -0.02855)(0.10767, -0.58036, -1.00000)(0.58036, 0.58036, 0.58036)(0.51224, 0.29221, -0.19975)(0.51224, 0.17462, -0.30513)(-0.58036, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(0.58036, 0.51224, 0.51224)(-1.00000, -1.00000, -1.00000)(0.46898, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-0.58036, -1.00000, -1.00000)(0.58036, 0.58036, 0.58036)
4(-0.58036, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-0.43642, -0.36759, -0.33375)(-0.38737, -1.00000, -1.00000)(0.30513, -0.22710, -0.58036)(-1.00000, -1.00000, -1.00000)(-0.00316, 0.02218, 0.04133)(-0.14360, 0.16663, 0.30513)(0.08718, 0.28002, -0.19116)(-0.14360, -0.25747, -0.46898)(-0.02218, 0.20859, 0.15881)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -0.58036)(0.17462, 0.00950, -0.20859)(-1.00000, -1.00000, -1.00000)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(0.04776, -0.02855, -0.36759)
5(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(0.04133, 0.05422, -0.04776)(0.33375, 0.30513, 0.18279)(-0.51224, -0.31892, -0.58036)(-1.00000, -1.00000, -1.00000)(0.04776, -0.14360, -0.38737)(-1.00000, -1.00000, -1.00000)(-0.21769, -0.16663, -0.13620)(-0.58036, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-1.00000, -0.58036, -0.58036)(-1.00000, -1.00000, -1.00000)(-0.58036, -1.00000, -1.00000)
6(-1.00000, -1.00000, -1.00000)(-1.00000, -0.58036, -0.51224)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -0.43642, -0.08049)(-0.38737, 0.05422, 0.58036)(-0.22710, -0.46898, -0.51224)(-0.11466, -0.13620, -0.08049)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(0.31892, 0.58036, 0.51224)(-1.00000, -1.00000, -1.00000)(-0.51224, -0.51224, -0.40993)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)
7(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(0.11466, 0.16663, 0.12174)(0.58036, 0.58036, 0.58036)(-0.43642, -0.31892, 0.00316)(-0.14360, 0.10767, 0.36759)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(0.51224, 0.51224, 0.43642)(-1.00000, -1.00000, -0.46898)(-0.40993, -0.38737, -0.38737)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-0.58036, -0.58036, -0.40993)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)
8(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -0.46898, -0.34987)(-0.03492, -0.25747, -0.36759)(0.58036, 0.58036, 0.58036)(0.36759, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, 0.12174)(0.43642, 0.40993, 0.40993)(-0.51224, -0.43642, -0.26847)(-0.34987, -0.38737, -0.40993)(0.58036, 0.58036, 0.58036)(-0.58036, -0.46898, -0.40993)(-0.19975, 0.43642, 0.51224)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)
9(-1.00000, -0.33375, -0.17462)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-0.19975, -0.16663, -0.06726)(-0.46898, -0.40993, -0.24694)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -0.51224)(-1.00000, -1.00000, -0.36759)(0.58036, 0.58036, 0.46898)(0.38737, 0.43642, 0.46898)(-0.13620, 0.05422, 0.29221)(-0.43642, -0.46898, -0.43642)(0.58036, 0.58036, 0.13620)(-0.30513, -0.08049, -0.04133)(0.40993, 0.58036, 0.58036)(-1.00000, -0.46898, -0.16663)(-1.00000, -1.00000, -1.00000)
10(0.26847, 0.33375, 0.58036)(-1.00000, -1.00000, -1.00000)(-1.00000, -1.00000, -1.00000)(-0.03492, -0.00950, 0.02218)(0.08049, 0.22710, 0.51224)(0.58036, 0.58036, 0.51224)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-0.10077, 0.09394, 0.03492)(0.43642, 0.51224, 0.51224)(-1.00000, 0.12174, 0.58036)(0.51224, 0.58036, 0.58036)(0.38737, 0.58036, 0.51224)(-0.40993, -0.31892, -0.29221)(0.21769, 0.15114, 0.06072)(-0.02855, -0.00316, 0.05422)(0.58036, 0.58036, 0.58036)(-0.18279, 0.51224, 0.58036)(-1.00000, -1.00000, -1.00000)
11(0.58036, 0.58036, 0.58036)(-0.58036, -1.00000, -0.30513)(-1.00000, -1.00000, -1.00000)(0.02855, 0.02855, 0.02855)(0.58036, 0.58036, 0.58036)(0.51224, 0.43642, 0.40993)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-0.19975, -0.31892, -0.46898)(0.40993, 0.34987, 0.29221)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.46898, 0.43642, 0.38737)(-0.29221, -0.12892, 0.06726)(-0.05422, -0.10767, -0.24694)(0.40993, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.51224, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)
12(0.58036, 0.58036, 0.58036)(-0.33375, -0.28002, -0.22710)(-1.00000, -1.00000, -1.00000)(0.02218, 0.02855, 0.02855)(0.58036, 0.58036, 0.58036)(0.34987, 0.29221, 0.23683)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)(-0.58036, -1.00000, -0.19975)(0.31892, 0.30513, 0.40993)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.30513, 0.51224, 0.58036)(0.28002, 0.58036, 0.46898)(-0.31892, -0.33375, -0.36759)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(0.58036, 0.58036, 0.58036)(-1.00000, -1.00000, -1.00000)
\n", "
" ], "text/plain": [ " 0 1 \\\n", "0 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "1 (0.58036, 0.58036, 0.58036) (0.46898, 0.15881, 0.46898) \n", "2 (0.51224, 0.33375, 0.04133) (0.26847, 0.05422, -0.10077) \n", "3 (-0.02218, -0.24694, -0.43642) (-0.51224, -0.43642, -0.43642) \n", "4 (-0.58036, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "5 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "6 (-1.00000, -1.00000, -1.00000) (-1.00000, -0.58036, -0.51224) \n", "7 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "8 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "9 (-1.00000, -0.33375, -0.17462) (-1.00000, -1.00000, -1.00000) \n", "10 (0.26847, 0.33375, 0.58036) (-1.00000, -1.00000, -1.00000) \n", "11 (0.58036, 0.58036, 0.58036) (-0.58036, -1.00000, -0.30513) \n", "12 (0.58036, 0.58036, 0.58036) (-0.33375, -0.28002, -0.22710) \n", "\n", " 2 3 \\\n", "0 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "1 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "2 (0.58036, 0.40993, 0.18279) (0.51224, 0.46898, 0.31892) \n", "3 (0.04776, -0.10077, -0.23683) (-0.00950, -0.02855, -0.29221) \n", "4 (-0.43642, -0.36759, -0.33375) (-0.38737, -1.00000, -1.00000) \n", "5 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "6 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "7 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "8 (-1.00000, -1.00000, -1.00000) (-1.00000, -0.46898, -0.34987) \n", "9 (-1.00000, -1.00000, -1.00000) (-0.19975, -0.16663, -0.06726) \n", "10 (-1.00000, -1.00000, -1.00000) (-0.03492, -0.00950, 0.02218) \n", "11 (-1.00000, -1.00000, -1.00000) (0.02855, 0.02855, 0.02855) \n", "12 (-1.00000, -1.00000, -1.00000) (0.02218, 0.02855, 0.02855) \n", "\n", " 4 5 \\\n", "0 (-1.00000, -1.00000, -0.46898) (0.36759, 0.43642, 0.51224) \n", "1 (-0.16663, 0.24694, 0.05422) (0.43642, 0.30513, 0.40993) \n", "2 (-0.13620, -0.29221, 0.06726) (0.25747, 0.13620, 0.06726) \n", "3 (0.51224, 0.46898, 0.46898) (-0.10767, -0.31892, -0.46898) \n", "4 (0.30513, -0.22710, -0.58036) (-1.00000, -1.00000, -1.00000) \n", "5 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "6 (-1.00000, -0.43642, -0.08049) (-0.38737, 0.05422, 0.58036) \n", "7 (0.11466, 0.16663, 0.12174) (0.58036, 0.58036, 0.58036) \n", "8 (-0.03492, -0.25747, -0.36759) (0.58036, 0.58036, 0.58036) \n", "9 (-0.46898, -0.40993, -0.24694) (0.58036, 0.58036, 0.58036) \n", "10 (0.08049, 0.22710, 0.51224) (0.58036, 0.58036, 0.51224) \n", "11 (0.58036, 0.58036, 0.58036) (0.51224, 0.43642, 0.40993) \n", "12 (0.58036, 0.58036, 0.58036) (0.34987, 0.29221, 0.23683) \n", "\n", " 6 7 \\\n", "0 (0.46898, 0.19116, -0.10077) (0.58036, 0.31892, -0.01583) \n", "1 (-0.33375, -1.00000, -1.00000) (-1.00000, -0.19116, -0.12174) \n", "2 (-1.00000, -1.00000, -0.58036) (0.43642, 0.46898, 0.46898) \n", "3 (-0.33375, -0.10767, -0.02855) (0.10767, -0.58036, -1.00000) \n", "4 (-0.00316, 0.02218, 0.04133) (-0.14360, 0.16663, 0.30513) \n", "5 (0.04133, 0.05422, -0.04776) (0.33375, 0.30513, 0.18279) \n", "6 (-0.22710, -0.46898, -0.51224) (-0.11466, -0.13620, -0.08049) \n", "7 (-0.43642, -0.31892, 0.00316) (-0.14360, 0.10767, 0.36759) \n", "8 (0.36759, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "9 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "10 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "11 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "12 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "\n", " 8 9 \\\n", "0 (0.58036, 0.58036, 0.58036) (0.06726, 0.06726, 0.51224) \n", "1 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "2 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "3 (0.58036, 0.58036, 0.58036) (0.51224, 0.29221, -0.19975) \n", "4 (0.08718, 0.28002, -0.19116) (-0.14360, -0.25747, -0.46898) \n", "5 (-0.51224, -0.31892, -0.58036) (-1.00000, -1.00000, -1.00000) \n", "6 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "7 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "8 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "9 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -0.51224) \n", "10 (-1.00000, -1.00000, -1.00000) (-0.10077, 0.09394, 0.03492) \n", "11 (-1.00000, -1.00000, -1.00000) (-0.19975, -0.31892, -0.46898) \n", "12 (-1.00000, -1.00000, -1.00000) (-0.58036, -1.00000, -0.19975) \n", "\n", " 10 11 \\\n", "0 (-0.14360, -0.13620, 0.15881) (0.22710, 0.20859, 0.01583) \n", "1 (0.58036, 0.58036, 0.58036) (-0.04776, -0.21769, -0.19975) \n", "2 (0.58036, 0.58036, 0.58036) (-0.34987, -0.40993, -0.58036) \n", "3 (0.51224, 0.17462, -0.30513) (-0.58036, -1.00000, -1.00000) \n", "4 (-0.02218, 0.20859, 0.15881) (-1.00000, -1.00000, -1.00000) \n", "5 (0.04776, -0.14360, -0.38737) (-1.00000, -1.00000, -1.00000) \n", "6 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "7 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "8 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, 0.12174) \n", "9 (-1.00000, -1.00000, -0.36759) (0.58036, 0.58036, 0.46898) \n", "10 (0.43642, 0.51224, 0.51224) (-1.00000, 0.12174, 0.58036) \n", "11 (0.40993, 0.34987, 0.29221) (0.58036, 0.58036, 0.58036) \n", "12 (0.31892, 0.30513, 0.40993) (0.58036, 0.58036, 0.58036) \n", "\n", " 12 13 \\\n", "0 (-1.00000, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n", "1 (-1.00000, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n", "2 (-1.00000, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n", "3 (-1.00000, -1.00000, -1.00000) (0.58036, 0.51224, 0.51224) \n", "4 (-1.00000, -1.00000, -0.58036) (0.17462, 0.00950, -0.20859) \n", "5 (-0.21769, -0.16663, -0.13620) (-0.58036, -1.00000, -1.00000) \n", "6 (0.31892, 0.58036, 0.51224) (-1.00000, -1.00000, -1.00000) \n", "7 (0.51224, 0.51224, 0.43642) (-1.00000, -1.00000, -0.46898) \n", "8 (0.43642, 0.40993, 0.40993) (-0.51224, -0.43642, -0.26847) \n", "9 (0.38737, 0.43642, 0.46898) (-0.13620, 0.05422, 0.29221) \n", "10 (0.51224, 0.58036, 0.58036) (0.38737, 0.58036, 0.51224) \n", "11 (0.58036, 0.58036, 0.58036) (0.46898, 0.43642, 0.38737) \n", "12 (0.58036, 0.58036, 0.58036) (0.30513, 0.51224, 0.58036) \n", "\n", " 14 15 \\\n", "0 (0.58036, 0.58036, 0.58036) (-1.00000, -1.00000, -1.00000) \n", "1 (0.58036, 0.58036, 0.38737) (-1.00000, -0.58036, -0.26847) \n", "2 (0.03492, -0.40993, -1.00000) (-0.16663, 0.05422, 0.25747) \n", "3 (-1.00000, -1.00000, -1.00000) (0.46898, 0.58036, 0.58036) \n", "4 (-1.00000, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n", "5 (-1.00000, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n", "6 (-0.51224, -0.51224, -0.40993) (0.58036, 0.58036, 0.58036) \n", "7 (-0.40993, -0.38737, -0.38737) (0.58036, 0.58036, 0.58036) \n", "8 (-0.34987, -0.38737, -0.40993) (0.58036, 0.58036, 0.58036) \n", "9 (-0.43642, -0.46898, -0.43642) (0.58036, 0.58036, 0.13620) \n", "10 (-0.40993, -0.31892, -0.29221) (0.21769, 0.15114, 0.06072) \n", "11 (-0.29221, -0.12892, 0.06726) (-0.05422, -0.10767, -0.24694) \n", "12 (0.28002, 0.58036, 0.46898) (-0.31892, -0.33375, -0.36759) \n", "\n", " 16 17 \\\n", "0 (0.58036, 0.58036, 0.58036) (0.58036, 0.33375, 0.29221) \n", "1 (0.58036, 0.58036, 0.58036) (-0.14360, -0.16663, -0.10077) \n", "2 (0.03492, -0.31892, -1.00000) (-0.58036, -1.00000, -1.00000) \n", "3 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "4 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "5 (-1.00000, -1.00000, -1.00000) (-1.00000, -0.58036, -0.58036) \n", "6 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "7 (-1.00000, -1.00000, -1.00000) (-0.58036, -0.58036, -0.40993) \n", "8 (-0.58036, -0.46898, -0.40993) (-0.19975, 0.43642, 0.51224) \n", "9 (-0.30513, -0.08049, -0.04133) (0.40993, 0.58036, 0.58036) \n", "10 (-0.02855, -0.00316, 0.05422) (0.58036, 0.58036, 0.58036) \n", "11 (0.40993, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "12 (0.58036, 0.58036, 0.58036) (0.58036, 0.58036, 0.58036) \n", "\n", " 18 19 \n", "0 (0.58036, 0.58036, 0.58036) (0.51224, 0.51224, 0.58036) \n", "1 (0.51224, 0.38737, 0.28002) (0.58036, 0.58036, 0.58036) \n", "2 (-0.08049, 0.06072, -0.36759) (0.58036, 0.58036, 0.58036) \n", "3 (-0.58036, -1.00000, -1.00000) (0.58036, 0.58036, 0.58036) \n", "4 (-1.00000, -1.00000, -1.00000) (0.04776, -0.02855, -0.36759) \n", "5 (-1.00000, -1.00000, -1.00000) (-0.58036, -1.00000, -1.00000) \n", "6 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "7 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "8 (-1.00000, -1.00000, -1.00000) (-1.00000, -1.00000, -1.00000) \n", "9 (-1.00000, -0.46898, -0.16663) (-1.00000, -1.00000, -1.00000) \n", "10 (-0.18279, 0.51224, 0.58036) (-1.00000, -1.00000, -1.00000) \n", "11 (0.51224, 0.58036, 0.58036) (-1.00000, -1.00000, -1.00000) \n", "12 (0.58036, 0.58036, 0.58036) (-1.00000, -1.00000, -1.00000) " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wordDfs[('X', 1)]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "tf_idfs = libword.tfidf(wordDfs, nwords=3)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012345678910111213141516171819
0(-1.00000, -0.33375, -0.17462)(-0.58036, -1.00000, -0.30513)(-0.43642, -0.36759, -0.33375)(-0.19975, -0.16663, -0.06726)(-0.13620, -0.29221, 0.06726)(0.25747, 0.13620, 0.06726)(-0.33375, -0.10767, -0.02855)(0.33375, 0.30513, 0.18279)(-1.00000, -1.00000, -1.00000)(0.51224, 0.29221, -0.19975)(0.51224, 0.17462, -0.30513)(-0.04776, -0.21769, -0.19975)(0.38737, 0.43642, 0.46898)(-0.51224, -0.43642, -0.26847)(-0.40993, -0.31892, -0.29221)(-0.05422, -0.10767, -0.24694)(-0.02855, -0.00316, 0.05422)(0.58036, 0.33375, 0.29221)(-1.00000, -0.46898, -0.16663)(-0.58036, -1.00000, -1.00000)
1(-0.02218, -0.24694, -0.43642)(0.46898, 0.15881, 0.46898)(0.58036, 0.40993, 0.18279)(-0.03492, -0.00950, 0.02218)(-0.03492, -0.25747, -0.36759)(-0.38737, 0.05422, 0.58036)(0.04133, 0.05422, -0.04776)(-0.14360, 0.10767, 0.36759)(0.08718, 0.28002, -0.19116)(-0.10077, 0.09394, 0.03492)(0.04776, -0.14360, -0.38737)(-1.00000, 0.12174, 0.58036)(0.31892, 0.58036, 0.51224)(0.17462, 0.00950, -0.20859)(0.03492, -0.40993, -1.00000)(0.21769, 0.15114, 0.06072)(0.03492, -0.31892, -1.00000)(-0.14360, -0.16663, -0.10077)(-0.18279, 0.51224, 0.58036)(0.51224, 0.51224, 0.58036)
2(0.51224, 0.33375, 0.04133)(0.26847, 0.05422, -0.10077)(0.04776, -0.10077, -0.23683)(-0.00950, -0.02855, -0.29221)(0.11466, 0.16663, 0.12174)(0.43642, 0.30513, 0.40993)(-0.00316, 0.02218, 0.04133)(-1.00000, -0.19116, -0.12174)(-0.51224, -0.31892, -0.58036)(-0.58036, -1.00000, -0.19975)(-0.14360, -0.13620, 0.15881)(0.22710, 0.20859, 0.01583)(-0.21769, -0.16663, -0.13620)(-0.13620, 0.05422, 0.29221)(0.28002, 0.58036, 0.46898)(-0.16663, 0.05422, 0.25747)(-0.30513, -0.08049, -0.04133)(-0.19975, 0.43642, 0.51224)(-0.08049, 0.06072, -0.36759)(0.04776, -0.02855, -0.36759)
\n", "
" ], "text/plain": [ " 0 1 \\\n", "0 (-1.00000, -0.33375, -0.17462) (-0.58036, -1.00000, -0.30513) \n", "1 (-0.02218, -0.24694, -0.43642) (0.46898, 0.15881, 0.46898) \n", "2 (0.51224, 0.33375, 0.04133) (0.26847, 0.05422, -0.10077) \n", "\n", " 2 3 \\\n", "0 (-0.43642, -0.36759, -0.33375) (-0.19975, -0.16663, -0.06726) \n", "1 (0.58036, 0.40993, 0.18279) (-0.03492, -0.00950, 0.02218) \n", "2 (0.04776, -0.10077, -0.23683) (-0.00950, -0.02855, -0.29221) \n", "\n", " 4 5 \\\n", "0 (-0.13620, -0.29221, 0.06726) (0.25747, 0.13620, 0.06726) \n", "1 (-0.03492, -0.25747, -0.36759) (-0.38737, 0.05422, 0.58036) \n", "2 (0.11466, 0.16663, 0.12174) (0.43642, 0.30513, 0.40993) \n", "\n", " 6 7 \\\n", "0 (-0.33375, -0.10767, -0.02855) (0.33375, 0.30513, 0.18279) \n", "1 (0.04133, 0.05422, -0.04776) (-0.14360, 0.10767, 0.36759) \n", "2 (-0.00316, 0.02218, 0.04133) (-1.00000, -0.19116, -0.12174) \n", "\n", " 8 9 \\\n", "0 (-1.00000, -1.00000, -1.00000) (0.51224, 0.29221, -0.19975) \n", "1 (0.08718, 0.28002, -0.19116) (-0.10077, 0.09394, 0.03492) \n", "2 (-0.51224, -0.31892, -0.58036) (-0.58036, -1.00000, -0.19975) \n", "\n", " 10 11 \\\n", "0 (0.51224, 0.17462, -0.30513) (-0.04776, -0.21769, -0.19975) \n", "1 (0.04776, -0.14360, -0.38737) (-1.00000, 0.12174, 0.58036) \n", "2 (-0.14360, -0.13620, 0.15881) (0.22710, 0.20859, 0.01583) \n", "\n", " 12 13 \\\n", "0 (0.38737, 0.43642, 0.46898) (-0.51224, -0.43642, -0.26847) \n", "1 (0.31892, 0.58036, 0.51224) (0.17462, 0.00950, -0.20859) \n", "2 (-0.21769, -0.16663, -0.13620) (-0.13620, 0.05422, 0.29221) \n", "\n", " 14 15 \\\n", "0 (-0.40993, -0.31892, -0.29221) (-0.05422, -0.10767, -0.24694) \n", "1 (0.03492, -0.40993, -1.00000) (0.21769, 0.15114, 0.06072) \n", "2 (0.28002, 0.58036, 0.46898) (-0.16663, 0.05422, 0.25747) \n", "\n", " 16 17 \\\n", "0 (-0.02855, -0.00316, 0.05422) (0.58036, 0.33375, 0.29221) \n", "1 (0.03492, -0.31892, -1.00000) (-0.14360, -0.16663, -0.10077) \n", "2 (-0.30513, -0.08049, -0.04133) (-0.19975, 0.43642, 0.51224) \n", "\n", " 18 19 \n", "0 (-1.00000, -0.46898, -0.16663) (-0.58036, -1.00000, -1.00000) \n", "1 (-0.18279, 0.51224, 0.58036) (0.51224, 0.51224, 0.58036) \n", "2 (-0.08049, 0.06072, -0.36759) (0.04776, -0.02855, -0.36759) " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tf_idfs[('X', 1)]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "g1 = odict((k,v) for k,v in wordDfs.items() if k[1] <= 31 )\n", "g2 = odict((k,v) for k,v in wordDfs.items() if k[1] > 31 and k[1] < 280 )\n", "g3 = odict((k,v) for k,v in wordDfs.items() if k[1] > 280 )\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "g1_tfidfs = libword.tfidf(g1, nwords=3)\n", "g2_tfidfs = libword.tfidf(g2, nwords=3)\n", "g3_tfidfs = libword.tfidf(g3, nwords=3)\n" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012345678910111213141516171819
0(0.26847, 0.33375, 0.58036)(0.46898, 0.15881, 0.46898)(-0.43642, -0.36759, -0.33375)(0.02855, 0.02855, 0.02855)(-0.03492, -0.25747, -0.36759)(-0.10767, -0.31892, -0.46898)(-0.33375, -0.10767, -0.02855)(0.33375, 0.30513, 0.18279)(-1.00000, -1.00000, -1.00000)(-0.10077, 0.09394, 0.03492)(0.51224, 0.17462, -0.30513)(-0.34987, -0.40993, -0.58036)(0.31892, 0.58036, 0.51224)(-0.51224, -0.43642, -0.26847)(-0.29221, -0.12892, 0.06726)(-0.05422, -0.10767, -0.24694)(-0.02855, -0.00316, 0.05422)(0.58036, 0.33375, 0.29221)(-1.00000, -0.46898, -0.16663)(-0.58036, -1.00000, -1.00000)
1(-0.02218, -0.24694, -0.43642)(0.26847, 0.05422, -0.10077)(0.04776, -0.10077, -0.23683)(-0.03492, -0.00950, 0.02218)(0.11466, 0.16663, 0.12174)(-0.38737, 0.05422, 0.58036)(0.04133, 0.05422, -0.04776)(-0.14360, 0.10767, 0.36759)(0.08718, 0.28002, -0.19116)(-0.58036, -1.00000, -0.19975)(0.04776, -0.14360, -0.38737)(0.22710, 0.20859, 0.01583)(-0.21769, -0.16663, -0.13620)(0.17462, 0.00950, -0.20859)(0.03492, -0.40993, -1.00000)(0.21769, 0.15114, 0.06072)(0.03492, -0.31892, -1.00000)(-0.14360, -0.16663, -0.10077)(-0.18279, 0.51224, 0.58036)(0.51224, 0.51224, 0.58036)
2(0.51224, 0.33375, 0.04133)(-0.58036, -1.00000, -0.30513)(0.58036, 0.40993, 0.18279)(-0.00950, -0.02855, -0.29221)(-1.00000, -0.43642, -0.08049)(0.43642, 0.30513, 0.40993)(-0.00316, 0.02218, 0.04133)(-1.00000, -0.19116, -0.12174)(-0.51224, -0.31892, -0.58036)(-0.19975, -0.31892, -0.46898)(-0.14360, -0.13620, 0.15881)(-0.04776, -0.21769, -0.19975)(0.38737, 0.43642, 0.46898)(-0.13620, 0.05422, 0.29221)(0.28002, 0.58036, 0.46898)(-0.16663, 0.05422, 0.25747)(-0.30513, -0.08049, -0.04133)(-0.19975, 0.43642, 0.51224)(-0.08049, 0.06072, -0.36759)(0.04776, -0.02855, -0.36759)
\n", "
" ], "text/plain": [ " 0 1 \\\n", "0 (0.26847, 0.33375, 0.58036) (0.46898, 0.15881, 0.46898) \n", "1 (-0.02218, -0.24694, -0.43642) (0.26847, 0.05422, -0.10077) \n", "2 (0.51224, 0.33375, 0.04133) (-0.58036, -1.00000, -0.30513) \n", "\n", " 2 3 \\\n", "0 (-0.43642, -0.36759, -0.33375) (0.02855, 0.02855, 0.02855) \n", "1 (0.04776, -0.10077, -0.23683) (-0.03492, -0.00950, 0.02218) \n", "2 (0.58036, 0.40993, 0.18279) (-0.00950, -0.02855, -0.29221) \n", "\n", " 4 5 \\\n", "0 (-0.03492, -0.25747, -0.36759) (-0.10767, -0.31892, -0.46898) \n", "1 (0.11466, 0.16663, 0.12174) (-0.38737, 0.05422, 0.58036) \n", "2 (-1.00000, -0.43642, -0.08049) (0.43642, 0.30513, 0.40993) \n", "\n", " 6 7 \\\n", "0 (-0.33375, -0.10767, -0.02855) (0.33375, 0.30513, 0.18279) \n", "1 (0.04133, 0.05422, -0.04776) (-0.14360, 0.10767, 0.36759) \n", "2 (-0.00316, 0.02218, 0.04133) (-1.00000, -0.19116, -0.12174) \n", "\n", " 8 9 \\\n", "0 (-1.00000, -1.00000, -1.00000) (-0.10077, 0.09394, 0.03492) \n", "1 (0.08718, 0.28002, -0.19116) (-0.58036, -1.00000, -0.19975) \n", "2 (-0.51224, -0.31892, -0.58036) (-0.19975, -0.31892, -0.46898) \n", "\n", " 10 11 \\\n", "0 (0.51224, 0.17462, -0.30513) (-0.34987, -0.40993, -0.58036) \n", "1 (0.04776, -0.14360, -0.38737) (0.22710, 0.20859, 0.01583) \n", "2 (-0.14360, -0.13620, 0.15881) (-0.04776, -0.21769, -0.19975) \n", "\n", " 12 13 \\\n", "0 (0.31892, 0.58036, 0.51224) (-0.51224, -0.43642, -0.26847) \n", "1 (-0.21769, -0.16663, -0.13620) (0.17462, 0.00950, -0.20859) \n", "2 (0.38737, 0.43642, 0.46898) (-0.13620, 0.05422, 0.29221) \n", "\n", " 14 15 \\\n", "0 (-0.29221, -0.12892, 0.06726) (-0.05422, -0.10767, -0.24694) \n", "1 (0.03492, -0.40993, -1.00000) (0.21769, 0.15114, 0.06072) \n", "2 (0.28002, 0.58036, 0.46898) (-0.16663, 0.05422, 0.25747) \n", "\n", " 16 17 \\\n", "0 (-0.02855, -0.00316, 0.05422) (0.58036, 0.33375, 0.29221) \n", "1 (0.03492, -0.31892, -1.00000) (-0.14360, -0.16663, -0.10077) \n", "2 (-0.30513, -0.08049, -0.04133) (-0.19975, 0.43642, 0.51224) \n", "\n", " 18 19 \n", "0 (-1.00000, -0.46898, -0.16663) (-0.58036, -1.00000, -1.00000) \n", "1 (-0.18279, 0.51224, 0.58036) (0.51224, 0.51224, 0.58036) \n", "2 (-0.08049, 0.06072, -0.36759) (0.04776, -0.02855, -0.36759) " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "g1_tfidfs[('X', 1)]" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "with open('wordDfs.pickle', 'wb') as fp:\n", " pickle.dump(wordDfs, fp)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.5" } }, "nbformat": 4, "nbformat_minor": 2 }