1 | import pandas as pd |
2 | import wikidata_plain_sparql as wikidata |
3 | |
4 | from bokeh.palettes import turbo |
5 | from bokeh.plotting import figure, output_notebook, show, gridplot |
6 | from bokeh.models import ColumnDataSource, HoverTool |
7 | |
8 | # set bokeh output mode to notebook |
9 | output_notebook() |
10 | |
11 | from helper import get_jhu_cached, create_grid, get_bokeh_tools |
1 | # get all dates from earliest available data until yesterday |
2 | all_dates = pd.date_range(start='2020-01-22', end='today')[:-1] |
3 | |
4 | # standardize column names for all entries |
5 | def rename_columns(column): |
6 | column_map = { |
7 | 'Lat': 'Latitude', |
8 | 'Long_': 'Longitude', |
9 | 'Incidence_Rate': 'Incident_Rate' |
10 | } |
11 | if column in column_map: |
12 | return column_map[column] |
13 | return column.replace('/', '_').replace('-', '_').replace(' ', '_') |
14 | |
15 | # load data from all dates |
16 | all_data = [] |
17 | for date in all_dates: |
18 | data = pd.read_csv(get_jhu_cached(date)) |
19 | data.rename(columns=rename_columns, inplace=True) |
20 | data['Date'] = date |
21 | all_data.append(data) |
22 | |
23 | # combine data from all days |
24 | raw_data = pd.concat(all_data) |
1 | # standardize country names for all entries |
2 | country_mapping = { |
3 | 'MS Zaandam|Diamond Princess|Cruise Ship': 'Others', # move cruise ships to others |
4 | 'Hong Kong.+': 'Hong Kong', |
5 | 'Iran.+': 'Iran', |
6 | '.*Congo.*': 'Congo', |
7 | 'Mainland China': 'China', |
8 | '.*Bahamas.*': 'The Bahamas', |
9 | '.*Gambia.*': 'The Gambia', |
10 | 'Viet Nam': 'Vietnam', |
11 | 'Taiwan\*': 'Taiwan', |
12 | 'Cote d\'Ivoire': 'Ivory Coast', |
13 | 'Cabo Verde': 'Cape Verde', |
14 | 'Russian Federation': 'Russia', |
15 | ' Azerbaijan': 'Azerbaijan', |
16 | 'Holy See': 'Vatican City', |
17 | 'Republic of Ireland': 'Ireland', |
18 | 'Republic of Moldova': 'Moldova', |
19 | 'Czechia': 'Czech Republic', |
20 | 'Republic of Korea|Korea, South': 'South Korea', |
21 | 'Timor-Leste': 'East Timor', |
22 | 'Macao SAR|Macau': 'Macao', |
23 | 'UK': 'United Kingdom', |
24 | 'Jersey|Guernsey': 'Channel Islands', |
25 | 'Dominican Republicn Republic|Dominica': 'Dominican Republic' |
26 | } |
27 | |
28 | raw_data['Country_Region'].replace(to_replace=country_mapping.keys(), value=country_mapping.values(), regex=True, inplace=True) |
1 | # group data by country |
2 | daily_updates = raw_data.groupby(['Country_Region', 'Date']).agg( |
3 | Confirmed = ('Confirmed','sum'), |
4 | Deaths = ('Deaths','sum'), |
5 | ).reset_index() |
6 | |
7 | # get changes in data |
8 | updates_per_country = daily_updates.groupby('Country_Region') |
9 | daily_updates['New_Confirmed'] = updates_per_country['Confirmed'].diff().fillna(0) |
10 | daily_updates['New_Deaths'] = updates_per_country['Deaths'].diff().fillna(0) |
11 | |
12 | worldwide_pretty = daily_updates.loc[:, ['Date', 'Country_Region', 'New_Confirmed', 'Confirmed', 'New_Deaths', 'Deaths']] |
1 | # get all countries incl. colors |
2 | all_countries = daily_updates['Country_Region'].sort_values().unique() |
3 | colors = turbo(all_countries.size) |
1 | # show new cases |
2 | tools = get_bokeh_tools(tooltips=[ |
3 | ('Country', '@country'), |
4 | ('Date', '@x{%F}'), |
5 | ('New Cases', '@y{0,0}'), |
6 | ]) |
7 | |
8 | new_cases_graph = figure( |
9 | title='New COVID-19 cases per country', |
10 | y_axis_label='new cases', |
11 | x_axis_type='datetime', |
12 | sizing_mode='stretch_width', |
13 | tools=tools) |
14 | |
15 | i = 0 |
16 | for country in all_countries: |
17 | updates_for_country = daily_updates.loc[daily_updates['Country_Region'] == country] |
18 | data = ColumnDataSource(data={ |
19 | 'x': updates_for_country['Date'], |
20 | 'y': updates_for_country['New_Confirmed'], |
21 | 'country': updates_for_country['Country_Region'] |
22 | }) |
23 | new_cases_graph.line('x', 'y', source=data, line_color=colors[i], line_width=2) |
24 | i += 1 |
25 | show(new_cases_graph) |
1 | # show total cases |
2 | tools = get_bokeh_tools(tooltips=[ |
3 | ('Country', '@country'), |
4 | ('Date', '@x{%F}'), |
5 | ('Total Cases', '@y{0,0}'), |
6 | ]) |
7 | |
8 | total_cases_graph = figure( |
9 | title='Total COVID-19 cases per country', |
10 | y_axis_label='total cases', |
11 | x_axis_type='datetime', |
12 | sizing_mode='stretch_width', |
13 | tools=tools) |
14 | |
15 | i = 0 |
16 | for country in all_countries: |
17 | updates_for_country = daily_updates.loc[daily_updates['Country_Region'] == country] |
18 | data = ColumnDataSource(data={ |
19 | 'x': updates_for_country['Date'], |
20 | 'y': updates_for_country['Confirmed'], |
21 | 'country': updates_for_country['Country_Region'] |
22 | }) |
23 | total_cases_graph.line('x', 'y', source=data, line_color=colors[i], line_width=2) |
24 | i += 1 |
25 | show(total_cases_graph) |
1 | # get population data from WikiData |
2 | canton_data = wikidata.query(''' |
3 | SELECT ?shortCode ?population ?canton WHERE { |
4 | ?canton wdt:P31 wd:Q23058. |
5 | ?canton wdt:P300 ?shortCode. |
6 | OPTIONAL { |
7 | ?canton p:P1082 ?population_stmt. |
8 | ?population_stmt ps:P1082 ?population. |
9 | ?population_stmt pq:P585 ?population_date. |
10 | } |
11 | FILTER NOT EXISTS { |
12 | ?canton p:P1082/pq:P585 ?population_date_. |
13 | FILTER (?population_date_ > ?population_date) |
14 | } |
15 | } |
16 | ORDER BY ?shortCode |
17 | ''') |
18 | canton_data.set_index('shortCode', inplace=True) |
1 | raw_data = pd.read_csv('https://raw.githubusercontent.com/openZH/covid_19/master/COVID19_Fallzahlen_CH_total_v2.csv') |
2 | |
3 | # convert to date |
4 | raw_data['date'] = pd.to_datetime(raw_data['date']) |
5 | |
6 | # remove FL |
7 | swiss = raw_data[raw_data['abbreviation_canton_and_fl'] != 'FL'] |
8 | |
9 | # only use data after 1st of june |
10 | swiss = swiss[swiss['date'] >= '2020-06-01'] |
11 | |
12 | # only keep useful entries |
13 | conf_cases = swiss.loc[-swiss['ncumul_conf'].isna()].copy() |
14 | |
15 | # get all cantons |
16 | all_cantons = conf_cases['abbreviation_canton_and_fl'].unique() |
17 | all_cantons.sort() |
18 | |
19 | # calculate new cases/deaths |
20 | conf_cases_per_canton = conf_cases.groupby('abbreviation_canton_and_fl') |
21 | conf_cases['new_cases'] = conf_cases_per_canton['ncumul_conf'].diff().fillna(0) |
22 | conf_cases['new_deaths'] = conf_cases_per_canton['ncumul_deceased'].diff().fillna(0) |
23 | |
24 | # calculate total cases/deaths |
25 | conf_cases['total_cases'] = conf_cases_per_canton['new_cases'].cumsum() |
26 | conf_cases['total_deaths'] = conf_cases_per_canton['new_deaths'].cumsum() |
27 | |
28 | # calculate values per 100'000 residents |
29 | def relative_values(group): |
30 | canton = group.name |
31 | return group.div(int(canton_data.at['CH-' + canton, 'population']) / 100000) |
32 | |
33 | conf_cases['new_cases_relative'] = conf_cases_per_canton['new_cases'].apply(relative_values) |
34 | conf_cases['total_cases_relative'] = conf_cases_per_canton['total_cases'].apply(relative_values) |
35 | |
36 | swiss_pretty = conf_cases.loc[:, ['date', 'abbreviation_canton_and_fl', 'new_cases', 'total_cases', 'new_deaths', 'total_deaths']] |
1 | # calculate new cases |
2 | graphs = [] |
3 | max_new_cases = conf_cases['new_cases_relative'].max() |
4 | for canton in all_cantons: |
5 | update_for_canton = conf_cases.loc[conf_cases['abbreviation_canton_and_fl'] == canton].copy() |
6 | |
7 | update_for_canton['new_cases_relative_avg'] = update_for_canton['new_cases_relative'].rolling(window=7).mean() |
8 | |
9 | new_cases_graph = figure(title=canton, y_axis_label='new cases', y_range=[0, max_new_cases], x_axis_type='datetime') |
10 | new_cases_graph.line(update_for_canton['date'], update_for_canton['new_cases_relative'], line_width=1) |
11 | new_cases_graph.line(update_for_canton['date'], update_for_canton['new_cases_relative_avg'], line_color='red', line_width=1) |
12 | graphs.append(new_cases_graph) |
13 | |
14 | show(create_grid(graphs, sizing_mode='scale_width')) |
1 | # calculate total cases |
2 | graphs = [] |
3 | max_total_cases = conf_cases['total_cases_relative'].max() |
4 | for canton in all_cantons: |
5 | update_for_canton = conf_cases.loc[conf_cases['abbreviation_canton_and_fl'] == canton] |
6 | |
7 | total_cases_graph = figure(title=canton, y_axis_label='total cases', y_range=[0, max_total_cases], x_axis_type='datetime') |
8 | total_cases_graph.line(update_for_canton['date'], update_for_canton['total_cases_relative'], line_width=1) |
9 | graphs.append(total_cases_graph) |
10 | |
11 | show(create_grid(graphs, sizing_mode='scale_width')) |
1 | worldwide_pretty |
Date | Country_Region | New_Confirmed | Confirmed | New_Deaths | Deaths | |
---|---|---|---|---|---|---|
0 | 2020-02-24 | Afghanistan | 0.0 | 5.0 | 0.0 | 0.0 |
1 | 2020-02-25 | Afghanistan | 0.0 | 5.0 | 0.0 | 0.0 |
2 | 2020-02-26 | Afghanistan | 0.0 | 5.0 | 0.0 | 0.0 |
3 | 2020-02-27 | Afghanistan | 0.0 | 5.0 | 0.0 | 0.0 |
4 | 2020-02-28 | Afghanistan | 0.0 | 5.0 | 0.0 | 0.0 |
... | ... | ... | ... | ... | ... | ... |
119753 | 2020-03-12 | occupied Palestinian territory | 0.0 | 0.0 | 0.0 | 0.0 |
119754 | 2020-03-14 | occupied Palestinian territory | 0.0 | 0.0 | 0.0 | 0.0 |
119755 | 2020-03-15 | occupied Palestinian territory | 0.0 | 0.0 | 0.0 | 0.0 |
119756 | 2020-03-16 | occupied Palestinian territory | 0.0 | 0.0 | 0.0 | 0.0 |
119757 | 2020-03-17 | occupied Palestinian territory | 0.0 | 0.0 | 0.0 | 0.0 |
119758 rows × 6 columns
1 | swiss_pretty |
date | abbreviation_canton_and_fl | new_cases | total_cases | new_deaths | total_deaths | |
---|---|---|---|---|---|---|
2371 | 2020-06-01 | BL | 0.0 | 0.0 | 0.0 | 0.0 |
2372 | 2020-06-01 | FR | 0.0 | 0.0 | 0.0 | 0.0 |
2373 | 2020-06-01 | GR | 0.0 | 0.0 | 0.0 | 0.0 |
2374 | 2020-06-01 | JU | 0.0 | 0.0 | 0.0 | 0.0 |
2375 | 2020-06-01 | NE | 0.0 | 0.0 | 0.0 | 0.0 |
... | ... | ... | ... | ... | ... | ... |
15584 | 2021-11-15 | TG | 1.0 | 28303.0 | 0.0 | 486.0 |
15585 | 2021-11-15 | UR | 80.0 | 4345.0 | 0.0 | 47.0 |
15586 | 2021-11-15 | BS | 51.0 | 16672.0 | 0.0 | 173.0 |
15587 | 2021-11-15 | AI | 44.0 | 2115.0 | 1.0 | 19.0 |
15588 | 2021-11-15 | ZH | 410.0 | 148324.0 | 0.0 | 1378.0 |
12404 rows × 6 columns
1 |