{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'/Users/ruffo/Documents/Didattica/2017:18/ComplexNetworks/dataviz/notebooks/data'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "os.chdir('./data')\n", "os.getcwd()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "from scipy import stats" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "x1 = np.array([10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0])\n", "y1 = np.array([8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68])\n", "\n", "x2 = np.array([10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0])\n", "y2 = np.array([9.14, 8.14, 8.74, 8.77, 9.26, 8.10, 6.13, 3.10, 9.13, 7.26, 4.74])\n", "\n", "x3 = np.array([10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0])\n", "y3 = np.array([7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73])\n", "\n", "x4 = np.array([8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 19.0, 8.0, 8.0, 8.0])\n", "y4 = np.array([6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.50, 5.56, 7.91, 6.89])\n", "\n", "limit=np.array([0,20])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7.50090909091 7.50090909091 7.5 7.50090909091\n", "3.75206280992 3.75239008264 3.74783636364 3.74840826446\n" ] } ], "source": [ "mx1=np.mean(x1)\n", "mx2=np.mean(x2)\n", "mx3=np.mean(x3)\n", "mx4=np.mean(x4)\n", "\n", "my1=np.mean(y1)\n", "my2=np.mean(y2)\n", "my3=np.mean(y3)\n", "my4=np.mean(y4)\n", "\n", "vx1=np.var(x1)\n", "vx2=np.var(x2)\n", "vx3=np.var(x3)\n", "vx4=np.var(x4)\n", "\n", "vy1=np.var(y1)\n", "vy2=np.var(y2)\n", "vy3=np.var(y3)\n", "vy4=np.var(y4)\n", "\n", "\n", "slope1, intercept1, r_value1, p_value1, std_err1 = stats.linregress(x1,y1)\n", "slope2, intercept2, r_value2, p_value2, std_err2 = stats.linregress(x2,y2)\n", "slope3, intercept3, r_value3, p_value3, std_err3 = stats.linregress(x3,y3)\n", "slope4, intercept4, r_value4, p_value4, std_err4 = stats.linregress(x4,y4)\n", "\n", "line1 = slope1*np.append(x1,limit)+intercept1\n", "line2 = slope2*np.append(x2,limit)+intercept2\n", "line3 = slope3*np.append(x3,limit)+intercept3\n", "line4 = slope4*np.append(x4,limit)+intercept4\n", "\n", "\n", "print(my1,my2,my3,my4)\n", "print(vy1,vy2,vy3,vy4)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "IOPub data rate exceeded.\n", "The notebook server will temporarily stop sending output\n", "to the client in order to avoid crashing it.\n", "To change this limit, set the config variable\n", "`--NotebookApp.iopub_data_rate_limit`.\n" ] } ], "source": [ "import plotly.plotly as py\n", "from plotly import __version__\n", "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n", "import plotly.graph_objs as go\n", "\n", "init_notebook_mode()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "This is the format of your plot grid:\n", "[ (1,1) x1,y1 ] [ (1,2) x2,y2 ]\n", "[ (2,1) x3,y3 ] [ (2,2) x4,y4 ]\n", "\n" ] } ], "source": [ "import plotly.plotly as py\n", "import plotly.graph_objs as go\n", "from plotly import tools\n", "\n", "trace1 = go.Scatter(\n", " x=x1,\n", " y=y1,\n", " mode = 'markers',\n", " name = \"Group1\",\n", " marker=dict(color=\"red\"),\n", " \n", " )\n", "\n", "trace1l = go.Scatter(\n", " x=np.append(x1,limit),\n", " y=line1,\n", " mode='lines',\n", " name=\"l1\",\n", " line=dict(color=\"red\")\n", ")\n", "\n", "trace2 = go.Scatter(\n", " x=x2,\n", " y=y2,\n", " xaxis='x2',\n", " yaxis='y2',\n", " mode = 'markers',\n", " name = \"Group2\",\n", " marker=dict(color=\"orange\")\n", ")\n", "\n", "trace2l = go.Scatter(\n", " x=np.append(x2,limit),\n", " y=line2,\n", " mode='lines',\n", " name=\"l2\",\n", " line=dict(color=\"orange\")\n", ")\n", "\n", "trace3 = go.Scatter(\n", " x=x3,\n", " y=y3,\n", " xaxis='x3',\n", " yaxis='y3',\n", " mode = 'markers',\n", " name = \"Group3\",\n", " marker=dict(color=\"green\")\n", ")\n", "\n", "trace3l = go.Scatter(\n", " x=np.append(x3,limit),\n", " y=line3,\n", " mode='lines',\n", " name=\"l3\",\n", " line=dict(color=\"green\")\n", ")\n", "\n", "trace4 = go.Scatter(\n", " x=x4,\n", " y=y4,\n", " xaxis='x4',\n", " yaxis='y4',\n", " mode = 'markers',\n", " name = \"Group4\",\n", " marker=dict(color=\"blue\")\n", ")\n", "\n", "trace4l = go.Scatter(\n", " x=np.append(x4,limit),\n", " y=line4,\n", " mode='lines',\n", " name=\"l4\",\n", " line=dict(color=\"blue\")\n", ")\n", "\n", "\n", "\n", "\n", "\n", "\n", "fig = tools.make_subplots(rows=2, cols=2)\n", "fig.append_trace(trace1, 1, 1)\n", "fig.append_trace(trace1l, 1, 1)\n", "fig.append_trace(trace2, 2, 1)\n", "fig.append_trace(trace2l, 2, 1)\n", "fig.append_trace(trace3, 1, 2)\n", "fig.append_trace(trace3l, 1, 2)\n", "fig.append_trace(trace4, 2, 2)\n", "fig.append_trace(trace4l, 2, 2)\n", "\n", "\n", "fig['layout']['yaxis1'].update(range=[0,15])\n", "fig['layout']['xaxis1'].update(range=[0,20])\n", "fig['layout']['yaxis2'].update(range=[0,15])\n", "fig['layout']['xaxis2'].update(range=[0,20])\n", "fig['layout']['yaxis3'].update(range=[0,15])\n", "fig['layout']['xaxis3'].update(range=[0,20])\n", "fig['layout']['yaxis4'].update(range=[0,15])\n", "fig['layout']['xaxis4'].update(range=[0,20])\n", "\n", "fig['layout']['title']=\"Anscombe's quartet\"\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'file:///Users/ruffo/Documents/Didattica/2017:18/ComplexNetworks/dataviz/notebooks/data/temp-plot.html'" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plot(fig)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "economics = pd.read_csv(\"economics.csv\").set_index('date')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0pcepoppsavertuempmedunemployyearmonth
date
1967-07-011507.419871212.54.5294419677
1967-08-012510.519891112.54.7294519678
1967-09-013516.319911311.74.6295819679
1967-10-014512.919931112.54.93143196710
1967-11-015518.119949812.54.73066196711
\n", "
" ], "text/plain": [ " Unnamed: 0 pce pop psavert uempmed unemploy year month\n", "date \n", "1967-07-01 1 507.4 198712 12.5 4.5 2944 1967 7\n", "1967-08-01 2 510.5 198911 12.5 4.7 2945 1967 8\n", "1967-09-01 3 516.3 199113 11.7 4.6 2958 1967 9\n", "1967-10-01 4 512.9 199311 12.5 4.9 3143 1967 10\n", "1967-11-01 5 518.1 199498 12.5 4.7 3066 1967 11" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "economics.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "economics=economics.drop(economics.columns[[0]], axis=1) " ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pcepoppsavertuempmedunemployyearmonth
date
1967-07-01507.419871212.54.5294419677
1967-08-01510.519891112.54.7294519678
1967-09-01516.319911311.74.6295819679
1967-10-01512.919931112.54.93143196710
1967-11-01518.119949812.54.73066196711
\n", "
" ], "text/plain": [ " pce pop psavert uempmed unemploy year month\n", "date \n", "1967-07-01 507.4 198712 12.5 4.5 2944 1967 7\n", "1967-08-01 510.5 198911 12.5 4.7 2945 1967 8\n", "1967-09-01 516.3 199113 11.7 4.6 2958 1967 9\n", "1967-10-01 512.9 199311 12.5 4.9 3143 1967 10\n", "1967-11-01 518.1 199498 12.5 4.7 3066 1967 11" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "economics.head()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'file:///Users/ruffo/Documents/Didattica/2017:18/ComplexNetworks/dataviz/notebooks/data/temp-plot.html'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trace = go.Scatter(\n", " x = economics.year,\n", " y = economics.psavert,\n", " mode = 'markers'\n", ")\n", "data = [trace]\n", "\n", "layout = go.Layout(\n", " title='Scatter plot',\n", " xaxis=dict(\n", " title='Year',\n", " titlefont=dict(\n", " family='Courier New, monospace',\n", " size=18,\n", " color='#7f7f7f'\n", " )\n", " ),\n", " yaxis=dict(\n", " title='Personal savings rate',\n", " titlefont=dict(\n", " family='Courier New, monospace',\n", " size=18,\n", " color='#7f7f7f'\n", " )\n", " )\n", ")\n", "fig = go.Figure(data=data,layout=layout)\n", "plot(fig)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'file:///Users/ruffo/Documents/Didattica/2017:18/ComplexNetworks/dataviz/notebooks/data/temp-plot.html'" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trace = go.Scatter(\n", " x = economics.index,\n", " y = economics.psavert,\n", " \n", ")\n", "data = [trace]\n", "layout = go.Layout(\n", " title='Line chart',\n", " xaxis=dict(\n", " title='Year',\n", " titlefont=dict(\n", " family='Courier New, monospace',\n", " size=18,\n", " color='#7f7f7f'\n", " )\n", " ),\n", " yaxis=dict(\n", " title='Personal savings rate',\n", " titlefont=dict(\n", " family='Courier New, monospace',\n", " size=18,\n", " color='#7f7f7f'\n", " )\n", " )\n", ")\n", "fig = go.Figure(data=data,layout=layout)\n", "plot(fig)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'file:///Users/ruffo/Documents/Didattica/2017:18/ComplexNetworks/dataviz/notebooks/data/temp-plot.html'" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trace = go.Scatter(\n", " x = economics.index,\n", " y = economics.psavert,\n", " fill='tonexty'\n", ")\n", "data = [trace]\n", "layout = go.Layout(\n", " title='Area chart',\n", " xaxis=dict(\n", " title='Year',\n", " titlefont=dict(\n", " family='Courier New, monospace',\n", " size=18,\n", " color='#7f7f7f'\n", " )\n", " ),\n", " yaxis=dict(\n", " title='Personal savings rate',\n", " titlefont=dict(\n", " family='Courier New, monospace',\n", " size=18,\n", " color='#7f7f7f'\n", " )\n", " )\n", ")\n", "fig = go.Figure(data=data,layout=layout)\n", "plot(fig)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'file:///Users/ruffo/Documents/Didattica/2017:18/ComplexNetworks/dataviz/notebooks/data/temp-plot.html'" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "traces=[]\n", "for i in range(economics.year.min(),economics.year.max()+1):\n", " anno=economics[economics.year==i]\n", " traces.append(go.Box(\n", " y=anno.psavert,\n", " name=i,\n", " line = dict(\n", " color = 'blue')\n", " \n", " )\n", " )\n", "layout = go.Layout(\n", " title='Boxplot',\n", " showlegend=False,\n", " xaxis=dict(\n", " title='Year',\n", " titlefont=dict(\n", " family='Courier New, monospace',\n", " size=18,\n", " color='#7f7f7f'\n", " )\n", " ),\n", " yaxis=dict(\n", " title='Personal savings rate',\n", " titlefont=dict(\n", " family='Courier New, monospace',\n", " size=18,\n", " color='#7f7f7f'\n", " )\n", ") \n", ")\n", "fig = go.Figure(data=traces,layout=layout)\n", "\n", "plot(fig)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'file:///Users/ruffo/Documents/Didattica/2017:18/ComplexNetworks/dataviz/notebooks/data/temp-plot.html'" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = [go.Bar(\n", " x=economics.year,\n", " y=economics.psavert\n", " )]\n", "\n", "layout = go.Layout(\n", " title='Barplot',\n", " \n", " xaxis=dict(\n", " title='Year',\n", " titlefont=dict(\n", " family='Courier New, monospace',\n", " size=18,\n", " color='#7f7f7f'\n", " )\n", " ),\n", " yaxis=dict(\n", " title='Personal savings rate',\n", " titlefont=dict(\n", " family='Courier New, monospace',\n", " size=18,\n", " color='#7f7f7f'\n", " )\n", ") \n", ")\n", "fig=go.Figure(data=data,layout=layout)\n", "plot(fig)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'file:///Users/ruffo/Documents/Didattica/2017:18/ComplexNetworks/dataviz/notebooks/data/temp-plot.html'" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data=[{\n", " 'x': economics[economics.year==y].month,\n", " 'y': economics[economics.year==y].psavert,\n", " 'name': y,\n", " \n", "} for y in range(economics.year.min(),economics.year.max()+1) if y%5==0]\n", "\n", "layout = go.Layout(\n", " title='Multiline chart',\n", " \n", " xaxis=dict(\n", " title='Year',\n", " titlefont=dict(\n", " family='Courier New, monospace',\n", " size=18,\n", " color='#7f7f7f'\n", " )\n", " ),\n", " yaxis=dict(\n", " title='Personal savings rate',\n", " titlefont=dict(\n", " family='Courier New, monospace',\n", " size=18,\n", " color='#7f7f7f'\n", " )\n", ") \n", ")\n", "fig=go.Figure(data=data,layout=layout)\n", "\n", "\n", "\n", "plot(fig)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": true }, "outputs": [], "source": [ "traces=[]\n", "for y in range(economics.year.min(),economics.year.max()+1):\n", " if y%5==0:\n", " anno=economics[economics.year==y]\n", " traces.append(go.Scatter(\n", " x=anno.month,\n", " y=anno.psavert,\n", " name=y,\n", " mode='lines'\n", "))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "This is the format of your plot grid:\n", "[ (1,1) x1,y1 ] [ (1,2) x2,y2 ] \n", "[ (2,1) x3,y3 ] [ (2,2) x4,y4 ] \n", "[ (3,1) x5,y5 ] [ (3,2) x6,y6 ] \n", "[ (4,1) x7,y7 ] [ (4,2) x8,y8 ] \n", "[ (5,1) x9,y9 ] [ (5,2) x10,y10 ]\n", "\n" ] } ], "source": [ "n=len(traces)\n", "fig = tools.make_subplots(rows=int(n/2), cols=2)\n", "for i in range(1,n+1):\n", " if i<= n/2:\n", " fig.append_trace(traces[i-1], i, 1)\n", " else:\n", " fig.append_trace(traces[i-1], i-int(n/2), 2)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'file:///Users/ruffo/Documents/Didattica/2017:18/ComplexNetworks/dataviz/notebooks/data/temp-plot.html'" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fig['layout']['yaxis1'].update(range=[0,20],title=\"psa\")\n", "fig['layout']['yaxis2'].update(range=[0,20],title=\"psa\")\n", "fig['layout']['yaxis3'].update(range=[0,20],title=\"psa\")\n", "fig['layout']['yaxis4'].update(range=[0,20],title=\"psa\")\n", "fig['layout']['yaxis5'].update(range=[0,20],title=\"psa\")\n", "fig['layout']['yaxis6'].update(range=[0,20],title=\"psa\")\n", "fig['layout']['yaxis7'].update(range=[0,20],title=\"psa\")\n", "fig['layout']['yaxis8'].update(range=[0,20],title=\"psa\")\n", "fig['layout']['yaxis9'].update(range=[0,20],title=\"psa\")\n", "fig['layout']['yaxis10'].update(range=[0,20],title=\"psa\")\n", "\n", "fig['layout']['xaxis1'].update(title=\"year\")\n", "fig['layout']['xaxis2'].update(title=\"year\")\n", "fig['layout']['xaxis3'].update(title=\"year\")\n", "fig['layout']['xaxis4'].update(title=\"year\")\n", "fig['layout']['xaxis5'].update(title=\"year\")\n", "fig['layout']['xaxis6'].update(title=\"year\")\n", "fig['layout']['xaxis7'].update(title=\"year\")\n", "fig['layout']['xaxis8'].update(title=\"year\")\n", "fig['layout']['xaxis9'].update(title=\"year\")\n", "fig['layout']['xaxis10'].update(title=\"year\")\n", "fig['layout']['title']=\"Separated subplots\"\n", "\n", "\n", "plot(fig)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": true }, "outputs": [], "source": [ "movies = pd.read_csv(\"movies.csv\")" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0titleyearlengthbudgetratingvotesr1r2r3...r9r10mpaaActionAnimationComedyDramaDocumentaryRomanceShort
01$1971121NaN6.43484.54.54.5...4.54.5NaN0011000
12$1000 a Touchdown193971NaN6.0200.014.54.5...4.514.5NaN0010000
23$21 a Day Once a Month19417NaN8.250.00.00.0...24.524.5NaN0100001
34$40,000199670NaN8.2614.50.00.0...34.545.5NaN0010000
45$50,000 Climax Show, The197571NaN3.41724.54.50.0...0.024.5NaN0000000
\n", "

5 rows × 25 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 title year length budget rating votes \\\n", "0 1 $ 1971 121 NaN 6.4 348 \n", "1 2 $1000 a Touchdown 1939 71 NaN 6.0 20 \n", "2 3 $21 a Day Once a Month 1941 7 NaN 8.2 5 \n", "3 4 $40,000 1996 70 NaN 8.2 6 \n", "4 5 $50,000 Climax Show, The 1975 71 NaN 3.4 17 \n", "\n", " r1 r2 r3 ... r9 r10 mpaa Action Animation Comedy Drama \\\n", "0 4.5 4.5 4.5 ... 4.5 4.5 NaN 0 0 1 1 \n", "1 0.0 14.5 4.5 ... 4.5 14.5 NaN 0 0 1 0 \n", "2 0.0 0.0 0.0 ... 24.5 24.5 NaN 0 1 0 0 \n", "3 14.5 0.0 0.0 ... 34.5 45.5 NaN 0 0 1 0 \n", "4 24.5 4.5 0.0 ... 0.0 24.5 NaN 0 0 0 0 \n", "\n", " Documentary Romance Short \n", "0 0 0 0 \n", "1 0 0 0 \n", "2 0 0 1 \n", "3 0 0 0 \n", "4 0 0 0 \n", "\n", "[5 rows x 25 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.head()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": true }, "outputs": [], "source": [ "movies=movies.drop(movies.columns[[0]], axis=1) " ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titleyearlengthbudgetratingvotesr1r2r3r4...r9r10mpaaActionAnimationComedyDramaDocumentaryRomanceShort
0$1971121NaN6.43484.54.54.54.5...4.54.5NaN0011000
1$1000 a Touchdown193971NaN6.0200.014.54.524.5...4.514.5NaN0010000
2$21 a Day Once a Month19417NaN8.250.00.00.00.0...24.524.5NaN0100001
3$40,000199670NaN8.2614.50.00.00.0...34.545.5NaN0010000
4$50,000 Climax Show, The197571NaN3.41724.54.50.014.5...0.024.5NaN0000000
\n", "

5 rows × 24 columns

\n", "
" ], "text/plain": [ " title year length budget rating votes r1 r2 \\\n", "0 $ 1971 121 NaN 6.4 348 4.5 4.5 \n", "1 $1000 a Touchdown 1939 71 NaN 6.0 20 0.0 14.5 \n", "2 $21 a Day Once a Month 1941 7 NaN 8.2 5 0.0 0.0 \n", "3 $40,000 1996 70 NaN 8.2 6 14.5 0.0 \n", "4 $50,000 Climax Show, The 1975 71 NaN 3.4 17 24.5 4.5 \n", "\n", " r3 r4 ... r9 r10 mpaa Action Animation Comedy Drama \\\n", "0 4.5 4.5 ... 4.5 4.5 NaN 0 0 1 1 \n", "1 4.5 24.5 ... 4.5 14.5 NaN 0 0 1 0 \n", "2 0.0 0.0 ... 24.5 24.5 NaN 0 1 0 0 \n", "3 0.0 0.0 ... 34.5 45.5 NaN 0 0 1 0 \n", "4 0.0 14.5 ... 0.0 24.5 NaN 0 0 0 0 \n", "\n", " Documentary Romance Short \n", "0 0 0 0 \n", "1 0 0 0 \n", "2 0 0 1 \n", "3 0 0 0 \n", "4 0 0 0 \n", "\n", "[5 rows x 24 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.head()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'file:///Users/ruffo/Documents/Didattica/2017:18/ComplexNetworks/dataviz/notebooks/data/temp-plot.html'" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = [go.Histogram(\n", " \n", " x=movies.rating,\n", " nbinsx=20\n", " )]\n", "\n", "\n", "layout = go.Layout(\n", " title='Histogram',\n", " \n", " xaxis=dict(\n", " title='Rating',\n", " \n", " ),\n", " yaxis=dict(\n", " title='Count',\n", " \n", ") \n", ")\n", "fig=go.Figure(data=data,layout=layout)\n", "plot(fig)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'file:///Users/ruffo/Documents/Didattica/2017:18/ComplexNetworks/dataviz/notebooks/data/temp-plot.html'" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = [go.Histogram(\n", " \n", " x=movies.rating,\n", " nbinsx=100\n", " )]\n", "layout = go.Layout(\n", " title='Histogram',\n", " \n", " xaxis=dict(\n", " title='Rating',\n", " \n", " ),\n", " yaxis=dict(\n", " title='Count',\n", " \n", ") \n", ")\n", "fig=go.Figure(data=data,layout=layout)\n", "plot(fig)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mpa=movies[-movies.mpaa.isnull()]\n", "mpa=mpa.sort_values(['rating'])\n", "mpa_values=set(mpa.mpaa)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data=[]\n", "for i in mpa_values:\n", " trace=go.Histogram(\n", " x=mpa[mpa.mpaa==i].rating,\n", " name=i\n", " )\n", " data.append(trace)\n", " \n", "layout = go.Layout(\n", " title='Histogram',\n", " \n", " xaxis=dict(\n", " title='Rating',\n", " \n", " ),\n", " yaxis=dict(\n", " title='Count',\n", " \n", ") \n", ")" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'file:///Users/ruffo/Documents/Didattica/2017:18/ComplexNetworks/dataviz/notebooks/data/temp-plot.html'" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fig = go.Figure(data=data,layout=layout)\n", "fig[\"layout\"][\"barmode\"]='stack'\n", "plot(fig)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['R', 'PG-13', 'NC-17', 'PG']" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "labels=[]\n", "for i in mpa_values:\n", " labels.append(i)\n", "labels" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[3377, 1003, 16, 528]" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "values=[]\n", "for i in labels:\n", " data=mpa[mpa.mpaa==i]\n", " q=data.mpaa.count()\n", " values.append(q)\n", "\n", "values\n", "\n" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": true }, "outputs": [], "source": [ "\n", "trace = [go.Pie(labels=labels, values=values,\n", " hoverinfo='label+percent', textinfo='value', \n", " textfont=dict(size=20),\n", " marker=dict(line=dict(color='#000000', width=2))\n", " )]\n", "\n", "layout = go.Layout(\n", " title='Pie') \n", "\n", "fig=go.Figure(data=trace,layout=layout)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'file:///Users/ruffo/Documents/Didattica/2017:18/ComplexNetworks/dataviz/notebooks/data/temp-plot.html'" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plot(fig)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": true }, "outputs": [], "source": [ "trace = [go.Pie(labels=labels, values=values,\n", " hoverinfo='label+percent', textinfo='value', \n", " textfont=dict(size=20),\n", " hole=0.7,\n", " \n", " marker=dict(line=dict(color='#000000', width=2)))]\n", "\n", " \n", "layout = go.Layout(\n", " title='Pie') \n", "\n", "fig=go.Figure(data=trace,layout=layout)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'file:///Users/ruffo/Documents/Didattica/2017:18/ComplexNetworks/dataviz/notebooks/data/temp-plot.html'" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plot(fig)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }