Python: two examples

Utilizzo l’environment conda py3

1
~$ conda activate py3

Versione modulo installato

1
2
3
4
5
6
7
8
9
10
11
~$ pip show pandas_datareader
Name: pandas-datareader
Version: 0.9.0
Summary: Data readers extracted from the pandas codebase,should be compatible with recent pandas versions
Home-page: https://github.com/pydata/pandas-datareader
Author: The PyData Development Team
Author-email: pydata@googlegroups.com
License: BSD License
Location: /home/user/miniconda3/envs/py3/lib/python3.7/site-packages
Requires: requests, pandas, lxml
Required-by: 

Indice

Due laboratori

  1. 911 Calls Project
  2. Finance Project

911 Calls Project

Kaggle data

1
2
3
4
5
6
7
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
1
2
3
# import df
df = pd.read_csv('911.csv')
df.head()
lat lng desc zip title timeStamp twp addr e
0 40.297876 -75.581294 REINDEER CT & DEAD END; NEW HANOVER; Station ... 19525.0 EMS: BACK PAINS/INJURY 2015-12-10 17:40:00 NEW HANOVER REINDEER CT & DEAD END 1
1 40.258061 -75.264680 BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP... 19446.0 EMS: DIABETIC EMERGENCY 2015-12-10 17:40:00 HATFIELD TOWNSHIP BRIAR PATH & WHITEMARSH LN 1
2 40.121182 -75.351975 HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St... 19401.0 Fire: GAS-ODOR/LEAK 2015-12-10 17:40:00 NORRISTOWN HAWS AVE 1
3 40.116153 -75.343513 AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;... 19401.0 EMS: CARDIAC EMERGENCY 2015-12-10 17:40:01 NORRISTOWN AIRY ST & SWEDE ST 1
4 40.251492 -75.603350 CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S... NaN EMS: DIZZINESS 2015-12-10 17:40:01 LOWER POTTSGROVE CHERRYWOOD CT & DEAD END 1
1
df.info()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99492 entries, 0 to 99491
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   lat        99492 non-null  float64
 1   lng        99492 non-null  float64
 2   desc       99492 non-null  object 
 3   zip        86637 non-null  float64
 4   title      99492 non-null  object 
 5   timeStamp  99492 non-null  object 
 6   twp        99449 non-null  object 
 7   addr       98973 non-null  object 
 8   e          99492 non-null  int64  
dtypes: float64(3), int64(1), object(5)
memory usage: 6.8+ MB
1
2
# top 5 zipcode for 911 cals
df['zip'].value_counts().head()
1
2
3
4
5
6
19401.0    6979
19464.0    6643
19403.0    4854
19446.0    4748
19406.0    3174
Name: zip, dtype: int64
1
2
# top 5 townships (twp) for 911 calls
df['twp'].value_counts().head()
1
2
3
4
5
6
LOWER MERION    8443
ABINGTON        5977
NORRISTOWN      5890
UPPER MERION    5227
CHELTENHAM      4575
Name: twp, dtype: int64
1
2
3
# unique title codes
# len(df['title'].unique())
df['title'].nunique()
1
110
1
2
# 'Reasons/Departments' extraction from title
df['Reason'] = df['title'].apply(lambda x: x.split(':')[0])
1
2
# frequency table of 'Reasons/Departments'
df['Reason'].value_counts()
1
2
3
4
EMS        48877
Traffic    35695
Fire       14920
Name: Reason, dtype: int64
1
2
# countplot with seaborn
sns.countplot(x='Reason', data=df)
1
<matplotlib.axes._subplots.AxesSubplot at 0x7ff700edb9d0>

png

1
2
# timeStamp
df['timeStamp'][0]
1
'2015-12-10 17:40:00'
1
2
# data type of the objects in the timeStamp column
type(df['timeStamp'][0])
1
str
1
2
3
# convert timeStamp string to DateTime
df['timeStamp'] = pd.to_datetime(df['timeStamp'])
df['timeStamp'][0]
1
Timestamp('2015-12-10 17:40:00')
1
2
# assegno la prima osservazione, così posso usare shift+tab nella cella successiva per vedere i methods
ora = df['timeStamp'][0]
1
2
# estraggo l'ora
ora.hour
1
17
1
2
3
4
# costruisco variabile ora, mese e giorno della settimana
df['Hour'] = df['timeStamp'].apply(lambda x: x.hour)
df['Month'] = df['timeStamp'].apply(lambda x: x.month)
df['Day of Week'] = df['timeStamp'].apply(lambda x: x.dayofweek)
1
2
3
# mapping number day week to day week
dmap = {0:'Mon',1:'Tue',2:'Wed',3:'Thu',4:'Fri',5:'Sat',6:'Sun'}
df['Day of Week'] = df['Day of Week'].map(dmap)
1
2
3
4
# countplot day week
sns.countplot(x='Day of Week',data=df,hue='Reason',palette='viridis')
# plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
1
<matplotlib.legend.Legend at 0x7ff6fffce450>

png

1
2
3
4
# countplot month
sns.countplot(x='Month',data=df,hue='Reason',palette='viridis')
# plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
1
<matplotlib.legend.Legend at 0x7ff6ff24ea50>

png

1
# 9-10-11 months missing
1
2
# attenzione sono presenti dei missing, non usare le colonne con i missing per i conteggi
df.isnull().sum()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
lat                0
lng                0
desc               0
zip            12855
title              0
timeStamp          0
twp               43
addr             519
e                  0
Reason             0
Hour               0
Month              0
Day of Week        0
dtype: int64
1
2
# attenzione sono presenti dei missing, non usare le colonne con i missing per i conteggi
df.isnull().any()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
lat            False
lng            False
desc           False
zip             True
title          False
timeStamp      False
twp             True
addr            True
e              False
Reason         False
Hour           False
Month          False
Day of Week    False
dtype: bool
1
2
3
# groupby month
byMonth = df.groupby(by='Month').count()
byMonth.head()
lat lng desc zip title timeStamp twp addr e Reason Hour Day of Week
Month
1 13205 13205 13205 11527 13205 13205 13203 13096 13205 13205 13205 13205
2 11467 11467 11467 9930 11467 11467 11465 11396 11467 11467 11467 11467
3 11101 11101 11101 9755 11101 11101 11092 11059 11101 11101 11101 11101
4 11326 11326 11326 9895 11326 11326 11323 11283 11326 11326 11326 11326
5 11423 11423 11423 9946 11423 11423 11420 11378 11423 11423 11423 11423
1
2
3
4
# line plot count, è evidente la caduta causata dal dato mancante
plt.figure(figsize=(4,2),dpi=100)
plt.style.use('default')
byMonth['lat'].plot()
1
<matplotlib.axes._subplots.AxesSubplot at 0x7ff703e0eed0>

png

1
2
3
# count plot, è evidente la caduta causata dal dato mancante
plt.figure(figsize=(4,2),dpi=100)
sns.countplot(x='Month',data=df,palette='viridis')
1
<matplotlib.axes._subplots.AxesSubplot at 0x7ff6ff5d5c50>

png

1
2
3
# linear fit calls per month
# trasformo l'index month in una colonna così da poterlo usare come variabile x
sns.lmplot(x='Month',y='twp',data=byMonth.reset_index())
1
<seaborn.axisgrid.FacetGrid at 0x7ff6ff5a60d0>

png

1
2
3
# creo colonna data da timeStamp
df['Date'] = df['timeStamp'].apply(lambda x: x.date())
df['Date'].head()
1
2
3
4
5
6
0    2015-12-10
1    2015-12-10
2    2015-12-10
3    2015-12-10
4    2015-12-10
Name: Date, dtype: object
1
2
3
4
5
# line plot time
plt.figure(figsize=(8,2),dpi=100)
plt.style.use('ggplot') # plt.style.use('default')
df.groupby(by='Date').count()['twp'].plot()
plt.tight_layout()

png

1
2
3
4
5
6
7
8
9
10
11
12
13
# plot per ogni Reason
fig,axes = plt.subplots(nrows=3,ncols=1,figsize=(8,6))

axes[0].plot(df[df['Reason']=='EMS'].groupby(by='Date').count()['twp'])
axes[0].set_title('EMS')

axes[1].plot(df[df['Reason']=='Fire'].groupby(by='Date').count()['twp'])
axes[1].set_title('Fire')

axes[2].plot(df[df['Reason']=='Traffic'].groupby(by='Date').count()['twp'])
axes[2].set_title('Traffic')

plt.tight_layout()

png

1
2
3
# heatmap column Hours & Index Day of the Week (prepare data - metodo 1)
dayHour = df[['Day of Week','Hour']].pivot_table(index='Day of Week',columns='Hour',values='Day of Week',aggfunc=len)
dayHour
Hour 0 1 2 3 4 5 6 7 8 9 ... 14 15 16 17 18 19 20 21 22 23
Day of Week
Fri 275 235 191 175 201 194 372 598 742 752 ... 932 980 1039 980 820 696 667 559 514 474
Mon 282 221 201 194 204 267 397 653 819 786 ... 869 913 989 997 885 746 613 497 472 325
Sat 375 301 263 260 224 231 257 391 459 640 ... 789 796 848 757 778 696 628 572 506 467
Sun 383 306 286 268 242 240 300 402 483 620 ... 684 691 663 714 670 655 537 461 415 330
Thu 278 202 233 159 182 203 362 570 777 828 ... 876 969 935 1013 810 698 617 553 424 354
Tue 269 240 186 170 209 239 415 655 889 880 ... 943 938 1026 1019 905 731 647 571 462 274
Wed 250 216 189 209 156 255 410 701 875 808 ... 904 867 990 1037 894 686 668 575 490 335

7 rows × 24 columns

1
2
3
# heatmap column Hours & Index Day of the Week (prepare data - metodo 2)
dayHour = df.groupby(by=['Day of Week','Hour']).count()['Reason'].unstack(level=-1)
dayHour
Hour 0 1 2 3 4 5 6 7 8 9 ... 14 15 16 17 18 19 20 21 22 23
Day of Week
Fri 275 235 191 175 201 194 372 598 742 752 ... 932 980 1039 980 820 696 667 559 514 474
Mon 282 221 201 194 204 267 397 653 819 786 ... 869 913 989 997 885 746 613 497 472 325
Sat 375 301 263 260 224 231 257 391 459 640 ... 789 796 848 757 778 696 628 572 506 467
Sun 383 306 286 268 242 240 300 402 483 620 ... 684 691 663 714 670 655 537 461 415 330
Thu 278 202 233 159 182 203 362 570 777 828 ... 876 969 935 1013 810 698 617 553 424 354
Tue 269 240 186 170 209 239 415 655 889 880 ... 943 938 1026 1019 905 731 647 571 462 274
Wed 250 216 189 209 156 255 410 701 875 808 ... 904 867 990 1037 894 686 668 575 490 335

7 rows × 24 columns

1
2
3
# heatmap column Hours & Index Day of the Week
plt.figure(figsize=(5,5))
sns.heatmap(dayHour,cmap='magma',linecolor='white',linewidths=1)
1
<matplotlib.axes._subplots.AxesSubplot at 0x7ff6ff321a90>

png

1
2
# clustermap column Hours & Index Day of the Week
sns.clustermap(dayHour,cmap='viridis',linecolor='black',linewidths=1,figsize=(5,5))
1
<seaborn.matrix.ClusterGrid at 0x7ff6feffc690>

png

1
2
3
4
# heatmap column Hours & Index Day of the Week
dayMonth = df.groupby(by=['Day of Week','Month']).count()['Reason'].unstack(level=-1)
plt.figure(figsize=(5,5))
sns.heatmap(dayMonth,cmap='magma',linecolor='white',linewidths=1)
1
<matplotlib.axes._subplots.AxesSubplot at 0x7ff6fe48b310>

png

1
2
# clustermap column Month & Index Day of the Week
sns.clustermap(dayMonth,cmap='viridis',linecolor='black',linewidths=1,figsize=(5,5))
1
<seaborn.matrix.ClusterGrid at 0x7ff6fe3ba150>

png

Finance Project

Read data from Google finance with pandas-datareader
Documentazione pandas-datareader remote_data

1
2
3
4
5
6
7
8
9
10
from pandas_datareader import data, wb

import pandas as pd
import numpy as np
import datetime

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

Aggiunta 16/04/2021
Per scaricare indici di borsa torna comodo yfinance che ha una granularità anche del minuto ma con un limite di download massimo di 7 giorni.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import yfinance as yf
import datetime
FTSEMIB = yf.download(tickers="FTSEMIB.MI", 
            start=datetime.datetime(2021, 4, 14), 
            end=datetime.datetime(2021, 4, 16), 
            interval="1m").sort_values(by='Datetime', ascending=False).head()

NASDAQ = yf.download(tickers="^IXIC", 
            start=datetime.datetime(2021, 4, 14), 
            end=datetime.datetime(2021, 4, 16), 
            interval="1m").sort_values(by='Datetime', ascending=False).head()

BITCOIN_USD = yf.download(tickers="BTC-USD", 
                          start=datetime.datetime(2021, 4, 14), 
                          end=datetime.datetime(2021, 4, 16), 
                          interval="1m").sort_values(by='Datetime', ascending=False).head()

TESLA = yf.download(tickers="TSLA", 
                    start=datetime.datetime(2021, 4, 14), 
                    end=datetime.datetime(2021, 4, 16), 
                    interval="1m").sort_values(by='Datetime', ascending=False).head()

Riprendendo l’esercitazione..

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# download data
start = datetime.datetime(2006, 1, 1)
end = datetime.datetime(2016, 1, 1)
BAC = data.DataReader('BAC', 'yahoo', start, end) # Bank of America
C = data.DataReader('C', 'yahoo', start, end)     # CitiGroup
GS = data.DataReader('GS', 'yahoo', start, end)   # Goldman Sachs
JPM = data.DataReader('JPM', 'yahoo', start, end) # JPMorgan Chase
MS = data.DataReader('MS', 'yahoo', start, end)   # Morgan Stanley
WFC = data.DataReader('WFC', 'yahoo', start, end) # Wells Fargo

# per scaricarli tutti in un comando
# df = data.DataReader(['BAC', 'C', 'GS', 'JPM', 'MS', 'WFC'],'yahoo', start, end)

# nel caso in cui non vada l'import (nb. qui CitiGroup presenta un dato anomalo eccessivo 'Stock Split C')
# df = pd.read_pickle('all_banks')
1
2
3
# lista delle sigle banche ordinata
tickers = sorted(['C', 'BAC','JPM','GS','MS','WFC'])
tickers
1
['BAC', 'C', 'GS', 'JPM', 'MS', 'WFC']
1
2
3
# creo df unico
bank_stocks = pd.concat([BAC, C, GS, JPM, MS, WFC],axis=1,keys=tickers)
bank_stocks.head()
BAC C ... MS WFC
High Low Open Close Volume Adj Close High Low Open Close ... Open Close Volume Adj Close High Low Open Close Volume Adj Close
Date
2006-01-03 47.180000 46.150002 46.919998 47.080002 16296700.0 35.054218 493.799988 481.100006 490.000000 492.899994 ... 57.169998 58.310001 5377000.0 37.399338 31.975000 31.195000 31.600000 31.900000 11016400.0 20.408512
2006-01-04 47.240002 46.450001 47.000000 46.580002 17757900.0 34.681938 491.000000 483.500000 488.600006 483.799988 ... 58.700001 58.349998 7977800.0 37.424999 31.820000 31.365000 31.799999 31.530001 10870000.0 20.171795
2006-01-05 46.830002 46.320000 46.580002 46.639999 14970700.0 34.726604 487.799988 484.000000 484.399994 486.200012 ... 58.549999 58.509998 5778000.0 37.527611 31.555000 31.309999 31.500000 31.495001 10158000.0 20.149401
2006-01-06 46.910000 46.349998 46.799999 46.570000 12599800.0 34.674480 489.000000 482.000000 488.799988 486.200012 ... 58.770000 58.570000 6889800.0 37.566090 31.775000 31.385000 31.580000 31.680000 8403800.0 20.267757
2006-01-09 46.970001 46.360001 46.720001 46.599998 15619400.0 34.696808 487.399994 483.000000 486.000000 483.899994 ... 58.630001 59.189999 4144500.0 37.963749 31.825001 31.555000 31.674999 31.674999 5619600.0 20.264565

5 rows × 36 columns

1
2
3
# definisco i nomi colonna del multilevel df
bank_stocks.columns.names = ['Bank Ticker','Stock Info']
bank_stocks.columns.names
1
FrozenList(['Bank Ticker', 'Stock Info'])

EDA (Exploratory Data Analysis)

Documentazione Multi-Level Indexing and Cross Section

1
bank_stocks.head()
Bank Ticker BAC C ... MS WFC
Stock Info High Low Open Close Volume Adj Close High Low Open Close ... Open Close Volume Adj Close High Low Open Close Volume Adj Close
Date
2006-01-03 47.180000 46.150002 46.919998 47.080002 16296700.0 35.054218 493.799988 481.100006 490.000000 492.899994 ... 57.169998 58.310001 5377000.0 37.399338 31.975000 31.195000 31.600000 31.900000 11016400.0 20.408512
2006-01-04 47.240002 46.450001 47.000000 46.580002 17757900.0 34.681938 491.000000 483.500000 488.600006 483.799988 ... 58.700001 58.349998 7977800.0 37.424999 31.820000 31.365000 31.799999 31.530001 10870000.0 20.171795
2006-01-05 46.830002 46.320000 46.580002 46.639999 14970700.0 34.726604 487.799988 484.000000 484.399994 486.200012 ... 58.549999 58.509998 5778000.0 37.527611 31.555000 31.309999 31.500000 31.495001 10158000.0 20.149401
2006-01-06 46.910000 46.349998 46.799999 46.570000 12599800.0 34.674480 489.000000 482.000000 488.799988 486.200012 ... 58.770000 58.570000 6889800.0 37.566090 31.775000 31.385000 31.580000 31.680000 8403800.0 20.267757
2006-01-09 46.970001 46.360001 46.720001 46.599998 15619400.0 34.696808 487.399994 483.000000 486.000000 483.899994 ... 58.630001 59.189999 4144500.0 37.963749 31.825001 31.555000 31.674999 31.674999 5619600.0 20.264565

5 rows × 36 columns

1
2
# Close price massimo per ciascuna banca
bank_stocks.xs(key='Close',level='Stock Info',axis=1).max()
1
2
3
4
5
6
7
8
Bank Ticker
BAC     54.900002
C      564.099976
GS     247.919998
JPM     70.080002
MS      89.300003
WFC     58.520000
dtype: float64

Formula rendimenti \(r_t = \frac{p_t - p_{t-1}}{p_{t-1}} = \frac{p_t}{p_{t-1}} - 1\)

1
2
3
4
5
6
7
8
# calcolo dei Rendimenti
returns = bank_stocks.xs(key='Close',level='Stock Info',axis=1).pct_change()
returns.columns = returns.columns+' Return'
# returns = pd.DataFrame()
# for tick in tickers:
#     returns[tick+' Return'] = bank_stocks[tick]['Close'].pct_change()

returns.head()
Bank Ticker BAC Return C Return GS Return JPM Return MS Return WFC Return
Date
2006-01-03 NaN NaN NaN NaN NaN NaN
2006-01-04 -0.010620 -0.018462 -0.013812 -0.014183 0.000686 -0.011599
2006-01-05 0.001288 0.004961 -0.000393 0.003029 0.002742 -0.001110
2006-01-06 -0.001501 0.000000 0.014169 0.007046 0.001025 0.005874
2006-01-09 0.000644 -0.004731 0.012030 0.016242 0.010586 -0.000158
1
2
# Citigroup has no 'great' outliers
returns['C Return'].describe()
1
2
3
4
5
6
7
8
9
count    2516.000000
mean       -0.000157
std         0.038672
min        -0.390244
25%        -0.011182
50%        -0.000210
75%         0.010751
max         0.578249
Name: C Return, dtype: float64
1
2
# pairplot
sns.pairplot(returns)
1
<seaborn.axisgrid.PairGrid at 0x7ff6d75a6bd0>

png

1
2
# Worst Drop (4 of them on Inauguration day)
returns.idxmin()
1
2
3
4
5
6
7
8
Bank Ticker
BAC Return   2009-01-20
C Return     2009-02-27
GS Return    2009-01-20
JPM Return   2009-01-20
MS Return    2008-10-09
WFC Return   2009-01-20
dtype: datetime64[ns]
1
2
# Best Single Day Gain (JPM e MS il migliore poco dopo)
returns.idxmax()
1
2
3
4
5
6
7
8
Bank Ticker
BAC Return   2009-04-09
C Return     2008-11-24
GS Return    2008-11-24
JPM Return   2009-01-21
MS Return    2008-10-13
WFC Return   2008-07-16
dtype: datetime64[ns]
1
2
# standard deviation intero periodo
returns.std()
1
2
3
4
5
6
7
8
Bank Ticker
BAC Return    0.036647
C Return      0.038672
GS Return     0.025390
JPM Return    0.027667
MS Return     0.037819
WFC Return    0.030238
dtype: float64
1
2
3
# standard deviation sul 2015 (filtro o apply con map sull'index)
# returns[returns.index.map(lambda x: x.year)==2015].std()
returns.loc['2015-01-01':'2015-12-31'].std()
1
2
3
4
5
6
7
8
Bank Ticker
BAC Return    0.016163
C Return      0.015289
GS Return     0.014046
JPM Return    0.014017
MS Return     0.016249
WFC Return    0.012591
dtype: float64
1
2
3
# distplot return del 2015 di MS
plt.figure(figsize=(4,2),dpi=100)
sns.distplot(returns.loc['2015-01-01':'2015-12-31']['MS Return'],bins=100,color='green')
1
<matplotlib.axes._subplots.AxesSubplot at 0x7ff6cf835e50>

png

1
2
3
# distplot return del 2015 di C
plt.figure(figsize=(4,2),dpi=100)
sns.distplot(returns.loc['2008-01-01':'2008-12-31']['C Return'],bins=100,color='red')
1
<matplotlib.axes._subplots.AxesSubplot at 0x7ff6ce6f98d0>

png

More Visualization

1
2
3
4
5
6
7
8
9
10
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

import plotly
import cufflinks as cf
cf.go_offline()

from IPython.display import Image
1
2
3
# close stock per ogni banca
close = bank_stocks.xs(key='Close',level='Stock Info',axis=1)
close.head()
Bank Ticker BAC C GS JPM MS WFC
Date
2006-01-03 47.080002 492.899994 128.869995 40.189999 58.310001 31.900000
2006-01-04 46.580002 483.799988 127.089996 39.619999 58.349998 31.530001
2006-01-05 46.639999 486.200012 127.040001 39.740002 58.509998 31.495001
2006-01-06 46.570000 486.200012 128.839996 40.020000 58.570000 31.680000
2006-01-09 46.599998 483.899994 130.389999 40.669998 59.189999 31.674999
1
2
3
4
5
# line plot per ogni close stock (matplotlib)
close.plot(figsize=(12,4))
# for tick in tickers:
#     bank_stocks[tick]['Close'].plot(figsize=(12,4),label=tick)
# plt.legend()
1
<matplotlib.axes._subplots.AxesSubplot at 0x7ff6df437790>

png

1
2
3
# line plot per ogni close stock (plotly)
fig = close.iplot(asFigure=True)
fig.show()
1
2
3
# display static image
img_bytes = fig.to_image(format="png")
Image(img_bytes)

png

Moving Averages

1
2
3
4
5
6
# close stock BAC e MA 30 giorni
fig = plt.figure(figsize=(12,6))
close.loc['2008-01-01':'2008-12-31']['BAC'].rolling(window=30).mean().plot(label='30 Day MA')
close.loc['2008-01-01':'2008-12-31']['BAC'].plot(label='BAC Close')
plt.legend()
plt.tight_layout()

png

1
2
3
# heatmap close price (reversescale color)
plt.figure(figsize=(5,4),dpi=100)
sns.heatmap(close.corr(),cmap='magma_r',linecolor='white',linewidths=1,annot=True)
1
<matplotlib.axes._subplots.AxesSubplot at 0x7ff6daa16210>

png

1
2
# clustermap close price
sns.clustermap(close.corr(),standard_scale=1,figsize=(6,6),annot=True,cmap='magma_r')
1
<seaborn.matrix.ClusterGrid at 0x7ff6da90a1d0>

png

Technical Analysis

1
2
3
# candle plot BAC
fig = BAC[['Open', 'High', 'Low', 'Close']].loc['2015-01-01':'2016-01-01'].iplot(kind='candle',asFigure=True,title='Candle plot BAC stock')
fig.show()
1
2
3
# display static image
img_bytes = fig.to_image(format="png")
Image(img_bytes)

png

1
2
# Simple Moving Average per MS (da lanciare in locale, non sono riuscito ad esportarla come immagine)
MS['Close'].loc['2015-01-01':'2016-01-01'].ta_plot(study='sma',periods=[13,21,55])
1
2
# Bollinger Band Plot per BAC
BAC['Close'].loc['2015-01-01':'2016-01-01'].ta_plot(study='boll')