Python: Plotly and Choropleth

Utilizzo l’environment conda py3

1
~$ conda activate py3

Versione modulo installato

1
2
3
4
5
6
7
8
9
10
11
~$ pip show chart-studio
Name: chart-studio
Version: 1.1.0
Summary: Utilities for interfacing with plotly's Chart Studio
Home-page: https://plot.ly/python/
Author: Chris P
Author-email: chris@plot.ly
License: MIT
Location: /home/user/miniconda3/envs/py3/lib/python3.7/site-packages
Requires: plotly, requests, retrying, six
Required-by:

Plotly and Choropleth

Per ottenere i grafici interattivi lanciare il codice in locale.
Sono riuscito ad esportarli via web con Markdown solo statici (sono necessari psutil e orca)

1
2
~$ conda install psutil
~$ conda install -c plotly plotly-orca

Cheat sheet Plotly
Documentazione Choropleth

1
2
3
4
5
import chart_studio.plotly as py
import plotly.graph_objs as go
import pandas as pd
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from IPython.display import Image
1
2
# per mostrare nel notebook
init_notebook_mode(connected=True)

Plot USA-states

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
data = dict(type = 'choropleth',
            locations = ['AZ','CA','NY'],
            locationmode = 'USA-states',
            colorscale= 'Portland',
            text= ['Arizona','Cali','New York'],
            z=[1.0,2.0,3.0],
            marker = dict(line = dict(color = 'rgb(0,0,0)',width = 3)),
            colorbar = {'title':'Colorbar Title'})

layout = dict(title = 'Plot 1')

choromap = go.Figure(data = [data],layout = layout)
choromap.update_geos(
    visible=True, 
    resolution=50, 
    scope="usa",
    
    # projection_type="orthographic", # "natural earth"
    # lataxis_showgrid=True, lonaxis_showgrid=True,
    
    showcoastlines=True, coastlinecolor="RebeccaPurple",
    showland=True, landcolor="LightGreen",
    showocean=True, oceancolor="LightBlue",
    showlakes=True, lakecolor="Blue",
    showrivers=True, rivercolor="Blue",
    
    showcountries=True, countrycolor="Black",
    showsubunits=True, subunitcolor="Black")
# choromap.show() # si può omettere se si usa il comando precedente
# iplot(choromap) # si può omettere se si usa il comando precedente
# plot(choromap)  # apre in nuova finestra
1
2
# export map
# choromap.write_image("img1.png")
1
2
3
4
# display static image
img_bytes = choromap.to_image(format="png") # parametri: width, height, scale
# img_bytes[:20]
Image(img_bytes)

png

Italy - Regioni

Confini Italiani fonte Openpolis

1
2
3
import pandas as pd
import requests
import plotly.express as px
1
2
3
# carico confini regionali
geojson_url = 'https://raw.githubusercontent.com/AlbGri/AlbGri.github.io/master/assets/files/Openpolis/geojson/limits_IT_regions.geojson'
italy_regions_geo = requests.get(geojson_url).json()
1
2
3
4
5
6
7
8
9
10
# Regioni
regions = ['Piemonte', 'Trentino-Alto Adige/Südtirol', 'Lombardia', 'Puglia', 'Basilicata', 
           'Friuli-Venezia Giulia', 'Liguria', "Valle d'Aosta/Vallée d'Aoste", 'Emilia-Romagna',
           'Molise', 'Lazio', 'Veneto', 'Sardegna', 'Sicilia', 'Abruzzo',
           'Calabria', 'Toscana', 'Umbria', 'Campania', 'Marche']
# Create a dataframe with the region names
df = pd.DataFrame(regions, columns=['reg_name'])
# For demonstration, create a column with the length of the region's name
df['name_length'] = df['reg_name'].str.len()
df.head()
reg_name name_length
0 Piemonte 8
1 Trentino-Alto Adige/Südtirol 28
2 Lombardia 9
3 Puglia 6
4 Basilicata 10
1
2
3
4
5
6
7
8
9
10
11
12
# Choropleth representing the length of region names
fig = px.choropleth(data_frame=df, 
                    geojson=italy_regions_geo, # anche il geojson_url gli si può passare
                    locations='reg_name',      # name of dataframe column
                    featureidkey='properties.reg_name',  # feature object locations
                    color='name_length',
                    color_continuous_scale="Portland",
                    scope="europe",
                   )
fig.update_geos(showcountries=False, showcoastlines=False, showland=False, fitbounds="locations")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
# fig.show()
1
2
3
# display static image
img_bytes = fig.to_image(format="png")
Image(img_bytes)

png

Italy - Province

Confini Italiani fonte Openpolis
Dati COVID-19 fonte Presidenza del Consiglio dei Ministri - Dipartimento della Protezione Civile

1
2
3
4
# from urllib.request import urlopen
import pandas as pd
import plotly.express as px
# import json
1
2
3
4
5
6
# carico confini provinciali
geojson_url = 'https://raw.githubusercontent.com/AlbGri/AlbGri.github.io/master/assets/files/Openpolis/geojson/limits_IT_provinces.geojson'
italy_province_geo = requests.get(geojson_url).json()

# struttura properties (printare tutto il dizionario è pesante)
italy_province_geo['features'][0]['properties']
1
2
3
4
5
6
7
{'prov_name': 'Torino',
 'prov_istat_code_num': 1,
 'prov_acr': 'TO',
 'reg_name': 'Piemonte',
 'reg_istat_code': '01',
 'reg_istat_code_num': 1,
 'prov_istat_code': '001'}
1
2
3
4
# carico df covid
covid_url = 'https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-province/dpc-covid19-ita-province.csv'
df = pd.read_csv(covid_url)
df.head()
data stato codice_regione denominazione_regione codice_provincia denominazione_provincia sigla_provincia lat long totale_casi note
0 2020-02-24T18:00:00 ITA 13 Abruzzo 66 L'Aquila AQ 42.351222 13.398438 0 NaN
1 2020-02-24T18:00:00 ITA 13 Abruzzo 67 Teramo TE 42.658918 13.704400 0 NaN
2 2020-02-24T18:00:00 ITA 13 Abruzzo 68 Pescara PE 42.464584 14.213648 0 NaN
3 2020-02-24T18:00:00 ITA 13 Abruzzo 69 Chieti CH 42.351032 14.167546 0 NaN
4 2020-02-24T18:00:00 ITA 13 Abruzzo 979 In fase di definizione/aggiornamento NaN NaN NaN 0 NaN
1
df.info()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18149 entries, 0 to 18148
Data columns (total 11 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   data                     18149 non-null  object 
 1   stato                    18149 non-null  object 
 2   codice_regione           18149 non-null  int64  
 3   denominazione_regione    18149 non-null  object 
 4   codice_provincia         18149 non-null  int64  
 5   denominazione_provincia  18149 non-null  object 
 6   sigla_provincia          14734 non-null  object 
 7   lat                      14873 non-null  float64
 8   long                     14873 non-null  float64
 9   totale_casi              18149 non-null  int64  
 10  note                     17 non-null     object 
dtypes: float64(2), int64(3), object(6)
memory usage: 1.5+ MB
1
2
# non temporanee/fuori italia
df[df['codice_provincia'] > 111]['codice_provincia'].unique()
1
2
3
4
array([979, 980, 982, 983, 984, 985, 986, 987, 988, 989, 990, 981, 996,
       991, 992, 993, 994, 995, 997, 998, 999, 879, 880, 882, 883, 884,
       885, 886, 887, 888, 889, 890, 881, 896, 891, 892, 893, 894, 895,
       897, 898, 899])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# seleziono colonne d'interesse
df = df[['data', 'codice_provincia','sigla_provincia', 'denominazione_provincia', 'totale_casi', 'note']]

# escludo non temporanee/fuori italia
df = df[df['codice_provincia'] <= 111]

# renaming
df = df.rename(columns={'data': 'time'})

# date formatting
df['data'] = pd.to_datetime(df['time'], format="%Y-%m-%d")
df['data'] = df['data'].dt.strftime('%Y-%m-%d')

# head
df.head()
time codice_provincia sigla_provincia denominazione_provincia totale_casi note data
0 2020-02-24T18:00:00 66 AQ L'Aquila 0 NaN 2020-02-24
1 2020-02-24T18:00:00 67 TE Teramo 0 NaN 2020-02-24
2 2020-02-24T18:00:00 68 PE Pescara 0 NaN 2020-02-24
3 2020-02-24T18:00:00 69 CH Chieti 0 NaN 2020-02-24
5 2020-02-24T18:00:00 76 PZ Potenza 0 NaN 2020-02-24
1
2
3
4
5
6
7
8
9
10
11
12
# Choropleth representing the progression of COVID-19 in italy province
fig = px.choropleth(df,
                    geojson=geojson_url, 
                    locations='codice_provincia', 
                    color='totale_casi', 
                    color_continuous_scale='Reds', 
                    featureidkey='properties.prov_istat_code_num', 
                    animation_frame='data', 
                    range_color=(0, max(df['totale_casi'])))
fig.update_geos(showcountries=False, showcoastlines=False, showland=False, fitbounds="locations")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
# fig.write_html('2020-07-11-covid-19-italy.html')
1
2
3
# display static image
img_bytes = fig.to_image(format="png")
Image(img_bytes)

png

Real Data US Map Choropleth

1
2
df = pd.read_csv('2011_US_AGRI_Exports')
df.head()
code state category total exports beef pork poultry dairy fruits fresh fruits proc total fruits veggies fresh veggies proc total veggies corn wheat cotton text
0 AL Alabama state 1390.63 34.4 10.6 481.0 4.06 8.0 17.1 25.11 5.5 8.9 14.33 34.9 70.0 317.61 Alabama<br>Beef 34.4 Dairy 4.06<br>Fruits 25.1...
1 AK Alaska state 13.31 0.2 0.1 0.0 0.19 0.0 0.0 0.00 0.6 1.0 1.56 0.0 0.0 0.00 Alaska<br>Beef 0.2 Dairy 0.19<br>Fruits 0.0 Ve...
2 AZ Arizona state 1463.17 71.3 17.9 0.0 105.48 19.3 41.0 60.27 147.5 239.4 386.91 7.3 48.7 423.95 Arizona<br>Beef 71.3 Dairy 105.48<br>Fruits 60...
3 AR Arkansas state 3586.02 53.2 29.4 562.9 3.53 2.2 4.7 6.88 4.4 7.1 11.45 69.5 114.5 665.44 Arkansas<br>Beef 53.2 Dairy 3.53<br>Fruits 6.8...
4 CA California state 16472.88 228.7 11.1 225.4 929.95 2791.8 5944.6 8736.40 803.2 1303.5 2106.79 34.6 249.3 1064.95 California<br>Beef 228.7 Dairy 929.95<br>Frui...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
data = dict(type='choropleth',
            colorscale = 'ylorbr',
            locations = df['code'],
            z = df['total exports'],
            locationmode = 'USA-states',
            text = df['text'],
            marker = dict(line = dict(color = 'rgb(12,12,12)',width = 2)),
            colorbar = {'title':"Millions USD"}
            )

layout = dict(title = '2011 US Agriculture Exports by State',
              geo = dict(showlakes = True,
                         lakecolor = 'rgb(85,173,240)')
             )

choromap = go.Figure(data = [data],layout = layout)
choromap = go.Figure(data = [data],layout = layout)
choromap.update_geos(
    visible=True, 
    resolution=50,
    scope='usa',    
    showcountries=True, countrycolor="Black",
    showsubunits=True, subunitcolor="Black")
1
2
3
# display static image
img_bytes = choromap.to_image(format="png")
Image(img_bytes)

png

World GDP

1
2
df = pd.read_csv('2014_World_GDP')
df.head()
COUNTRY GDP (BILLIONS) CODE
0 Afghanistan 21.71 AFG
1 Albania 13.40 ALB
2 Algeria 227.80 DZA
3 American Samoa 0.75 ASM
4 Andorra 4.80 AND
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
data = dict(
        type = 'choropleth',
        colorscale = 'ylorbr',
        locations = df['CODE'],
        z = df['GDP (BILLIONS)'],
        text = df['COUNTRY'],
        colorbar = {'title' : 'GDP Billions US'},
      )

layout = dict(
    title = '2014 Global GDP',
    geo = dict(
        showframe = False,
        projection = {'type':'mercator'}
    )
)

choromap = go.Figure(data = [data],layout = layout)
iplot(choromap)
1
2
3
# display static image
img_bytes = choromap.to_image(format="png")
Image(img_bytes)

png

Word Power Consumption

1
2
df = pd.read_csv('2014_World_Power_Consumption')
df.head()
Country Power Consumption KWH Text
0 China 5.523000e+12 China 5,523,000,000,000
1 United States 3.832000e+12 United 3,832,000,000,000
2 European 2.771000e+12 European 2,771,000,000,000
3 Russia 1.065000e+12 Russia 1,065,000,000,000
4 Japan 9.210000e+11 Japan 921,000,000,000
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
data = dict(type = 'choropleth',
            colorscale = 'Viridis',
            reversescale = True,
            locations = df['Country'],
            locationmode = 'country names',  # l'incrocio della mappa si deve basare sul country names
            z = df['Power Consumption KWH'],
            text = df['Country'],
            colorbar = {'title' : 'Power Consumption KWH'},
           )

layout = dict(title = '2014 Power Consumption KWH',
              geo = dict(
                  showframe = False,
                  projection = {'type':'mercator'}
              )
             )

choromap = go.Figure(data = [data],layout = layout)
iplot(choromap,validate=False)
1
2
3
# display static image
img_bytes = choromap.to_image(format="png")
Image(img_bytes)

png

USA Choropleth

1
2
df = pd.read_csv('2012_Election_Data')
df.head()
Year ICPSR State Code Alphanumeric State Code State VEP Total Ballots Counted VEP Highest Office VAP Highest Office Total Ballots Counted Highest Office Voting-Eligible Population (VEP) Voting-Age Population (VAP) % Non-citizen Prison Probation Parole Total Ineligible Felon State Abv
0 2012 41 1 Alabama NaN 58.6% 56.0% NaN 2,074,338 3,539,217 3707440.0 2.6% 32,232 57,993 8,616 71,584 AL
1 2012 81 2 Alaska 58.9% 58.7% 55.3% 301,694 300,495 511,792 543763.0 3.8% 5,633 7,173 1,882 11,317 AK
2 2012 61 3 Arizona 53.0% 52.6% 46.5% 2,323,579 2,306,559 4,387,900 4959270.0 9.9% 35,188 72,452 7,460 81,048 AZ
3 2012 42 4 Arkansas 51.1% 50.7% 47.7% 1,078,548 1,069,468 2,109,847 2242740.0 3.5% 14,471 30,122 23,372 53,808 AR
4 2012 71 5 California 55.7% 55.1% 45.1% 13,202,158 13,038,547 23,681,837 28913129.0 17.4% 119,455 0 89,287 208,742 CA
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
data = dict(type = 'choropleth',
            colorscale = 'Viridis',
            reversescale = True,
            locations = df['State Abv'],
            locationmode = 'USA-states',
            z = df['Voting-Age Population (VAP)'],
            text = df['State'],
            marker = dict(line = dict(color = 'rgb(255,255,255)',width = 1)),
            colorbar = {'title' : 'Voting-Age Population (VAP)'}
           )

layout = dict(title = '2012 Voting-Age Population (VAP)', 
              geo = dict(
                  scope = "usa",
                  showlakes = True,
                  lakecolor = 'rgb(85,173,240)'
                  )
             )

choromap = go.Figure(data = [data],layout = layout)
iplot(choromap,validate=False)
1
2
3
# display static image
img_bytes = choromap.to_image(format="png")
Image(img_bytes)

png