| 1 | import pandas as pd |
| 2 | import wikidata_plain_sparql as wikidata |
| 3 | |
| 4 | from bokeh.palettes import turbo |
| 5 | from bokeh.plotting import figure, output_notebook, show, gridplot |
| 6 | from bokeh.models import ColumnDataSource, HoverTool |
| 7 | |
| 8 | # set bokeh output mode to notebook |
| 9 | output_notebook() |
| 10 | |
| 11 | from helper import get_jhu_cached, create_grid, get_bokeh_tools |
| 1 | # get all dates from earliest available data until yesterday |
| 2 | all_dates = pd.date_range(start='2020-01-22', end='today')[:-1] |
| 3 | |
| 4 | # standardize column names for all entries |
| 5 | def rename_columns(column): |
| 6 | column_map = { |
| 7 | 'Lat': 'Latitude', |
| 8 | 'Long_': 'Longitude', |
| 9 | 'Incidence_Rate': 'Incident_Rate' |
| 10 | } |
| 11 | if column in column_map: |
| 12 | return column_map[column] |
| 13 | return column.replace('/', '_').replace('-', '_').replace(' ', '_') |
| 14 | |
| 15 | # load data from all dates |
| 16 | all_data = [] |
| 17 | for date in all_dates: |
| 18 | data = pd.read_csv(get_jhu_cached(date)) |
| 19 | data.rename(columns=rename_columns, inplace=True) |
| 20 | data['Date'] = date |
| 21 | all_data.append(data) |
| 22 | |
| 23 | # combine data from all days |
| 24 | raw_data = pd.concat(all_data) |
| 1 | # standardize country names for all entries |
| 2 | country_mapping = { |
| 3 | 'MS Zaandam|Diamond Princess|Cruise Ship': 'Others', # move cruise ships to others |
| 4 | 'Hong Kong.+': 'Hong Kong', |
| 5 | 'Iran.+': 'Iran', |
| 6 | '.*Congo.*': 'Congo', |
| 7 | 'Mainland China': 'China', |
| 8 | '.*Bahamas.*': 'The Bahamas', |
| 9 | '.*Gambia.*': 'The Gambia', |
| 10 | 'Viet Nam': 'Vietnam', |
| 11 | 'Taiwan\*': 'Taiwan', |
| 12 | 'Cote d\'Ivoire': 'Ivory Coast', |
| 13 | 'Cabo Verde': 'Cape Verde', |
| 14 | 'Russian Federation': 'Russia', |
| 15 | ' Azerbaijan': 'Azerbaijan', |
| 16 | 'Holy See': 'Vatican City', |
| 17 | 'Republic of Ireland': 'Ireland', |
| 18 | 'Republic of Moldova': 'Moldova', |
| 19 | 'Czechia': 'Czech Republic', |
| 20 | 'Republic of Korea|Korea, South': 'South Korea', |
| 21 | 'Timor-Leste': 'East Timor', |
| 22 | 'Macao SAR|Macau': 'Macao', |
| 23 | 'UK': 'United Kingdom', |
| 24 | 'Jersey|Guernsey': 'Channel Islands', |
| 25 | 'Dominican Republicn Republic|Dominica': 'Dominican Republic' |
| 26 | } |
| 27 | |
| 28 | raw_data['Country_Region'].replace(to_replace=country_mapping.keys(), value=country_mapping.values(), regex=True, inplace=True) |
| 1 | # group data by country |
| 2 | daily_updates = raw_data.groupby(['Country_Region', 'Date']).agg( |
| 3 | Confirmed = ('Confirmed','sum'), |
| 4 | Deaths = ('Deaths','sum'), |
| 5 | ).reset_index() |
| 6 | |
| 7 | # get changes in data |
| 8 | updates_per_country = daily_updates.groupby('Country_Region') |
| 9 | daily_updates['New_Confirmed'] = updates_per_country['Confirmed'].diff().fillna(0) |
| 10 | daily_updates['New_Deaths'] = updates_per_country['Deaths'].diff().fillna(0) |
| 11 | |
| 12 | worldwide_pretty = daily_updates.loc[:, ['Date', 'Country_Region', 'New_Confirmed', 'Confirmed', 'New_Deaths', 'Deaths']] |
| 1 | # get all countries incl. colors |
| 2 | all_countries = daily_updates['Country_Region'].sort_values().unique() |
| 3 | colors = turbo(all_countries.size) |
| 1 | # show new cases |
| 2 | tools = get_bokeh_tools(tooltips=[ |
| 3 | ('Country', '@country'), |
| 4 | ('Date', '@x{%F}'), |
| 5 | ('New Cases', '@y{0,0}'), |
| 6 | ]) |
| 7 | |
| 8 | new_cases_graph = figure( |
| 9 | title='New COVID-19 cases per country', |
| 10 | y_axis_label='new cases', |
| 11 | x_axis_type='datetime', |
| 12 | sizing_mode='stretch_width', |
| 13 | tools=tools) |
| 14 | |
| 15 | i = 0 |
| 16 | for country in all_countries: |
| 17 | updates_for_country = daily_updates.loc[daily_updates['Country_Region'] == country] |
| 18 | data = ColumnDataSource(data={ |
| 19 | 'x': updates_for_country['Date'], |
| 20 | 'y': updates_for_country['New_Confirmed'], |
| 21 | 'country': updates_for_country['Country_Region'] |
| 22 | }) |
| 23 | new_cases_graph.line('x', 'y', source=data, line_color=colors[i], line_width=2) |
| 24 | i += 1 |
| 25 | show(new_cases_graph) |
| 1 | # show total cases |
| 2 | tools = get_bokeh_tools(tooltips=[ |
| 3 | ('Country', '@country'), |
| 4 | ('Date', '@x{%F}'), |
| 5 | ('Total Cases', '@y{0,0}'), |
| 6 | ]) |
| 7 | |
| 8 | total_cases_graph = figure( |
| 9 | title='Total COVID-19 cases per country', |
| 10 | y_axis_label='total cases', |
| 11 | x_axis_type='datetime', |
| 12 | sizing_mode='stretch_width', |
| 13 | tools=tools) |
| 14 | |
| 15 | i = 0 |
| 16 | for country in all_countries: |
| 17 | updates_for_country = daily_updates.loc[daily_updates['Country_Region'] == country] |
| 18 | data = ColumnDataSource(data={ |
| 19 | 'x': updates_for_country['Date'], |
| 20 | 'y': updates_for_country['Confirmed'], |
| 21 | 'country': updates_for_country['Country_Region'] |
| 22 | }) |
| 23 | total_cases_graph.line('x', 'y', source=data, line_color=colors[i], line_width=2) |
| 24 | i += 1 |
| 25 | show(total_cases_graph) |
| 1 | # get population data from WikiData |
| 2 | canton_data = wikidata.query(''' |
| 3 | SELECT ?shortCode ?population ?canton WHERE { |
| 4 | ?canton wdt:P31 wd:Q23058. |
| 5 | ?canton wdt:P300 ?shortCode. |
| 6 | OPTIONAL { |
| 7 | ?canton p:P1082 ?population_stmt. |
| 8 | ?population_stmt ps:P1082 ?population. |
| 9 | ?population_stmt pq:P585 ?population_date. |
| 10 | } |
| 11 | FILTER NOT EXISTS { |
| 12 | ?canton p:P1082/pq:P585 ?population_date_. |
| 13 | FILTER (?population_date_ > ?population_date) |
| 14 | } |
| 15 | } |
| 16 | ORDER BY ?shortCode |
| 17 | ''') |
| 18 | canton_data.set_index('shortCode', inplace=True) |
| 1 | raw_data = pd.read_csv('https://raw.githubusercontent.com/openZH/covid_19/master/COVID19_Fallzahlen_CH_total_v2.csv') |
| 2 | |
| 3 | # convert to date |
| 4 | raw_data['date'] = pd.to_datetime(raw_data['date']) |
| 5 | |
| 6 | # remove FL |
| 7 | swiss = raw_data[raw_data['abbreviation_canton_and_fl'] != 'FL'] |
| 8 | |
| 9 | # only use data after 1st of june |
| 10 | swiss = swiss[swiss['date'] >= '2020-06-01'] |
| 11 | |
| 12 | # only keep useful entries |
| 13 | conf_cases = swiss.loc[-swiss['ncumul_conf'].isna()].copy() |
| 14 | |
| 15 | # get all cantons |
| 16 | all_cantons = conf_cases['abbreviation_canton_and_fl'].unique() |
| 17 | all_cantons.sort() |
| 18 | |
| 19 | # calculate new cases/deaths |
| 20 | conf_cases_per_canton = conf_cases.groupby('abbreviation_canton_and_fl') |
| 21 | conf_cases['new_cases'] = conf_cases_per_canton['ncumul_conf'].diff().fillna(0) |
| 22 | conf_cases['new_deaths'] = conf_cases_per_canton['ncumul_deceased'].diff().fillna(0) |
| 23 | |
| 24 | # calculate total cases/deaths |
| 25 | conf_cases['total_cases'] = conf_cases_per_canton['new_cases'].cumsum() |
| 26 | conf_cases['total_deaths'] = conf_cases_per_canton['new_deaths'].cumsum() |
| 27 | |
| 28 | # calculate values per 100'000 residents |
| 29 | def relative_values(group): |
| 30 | canton = group.name |
| 31 | return group.div(int(canton_data.at['CH-' + canton, 'population']) / 100000) |
| 32 | |
| 33 | conf_cases['new_cases_relative'] = conf_cases_per_canton['new_cases'].apply(relative_values) |
| 34 | conf_cases['total_cases_relative'] = conf_cases_per_canton['total_cases'].apply(relative_values) |
| 35 | |
| 36 | swiss_pretty = conf_cases.loc[:, ['date', 'abbreviation_canton_and_fl', 'new_cases', 'total_cases', 'new_deaths', 'total_deaths']] |
| 1 | # calculate new cases |
| 2 | graphs = [] |
| 3 | max_new_cases = conf_cases['new_cases_relative'].max() |
| 4 | for canton in all_cantons: |
| 5 | update_for_canton = conf_cases.loc[conf_cases['abbreviation_canton_and_fl'] == canton].copy() |
| 6 | |
| 7 | update_for_canton['new_cases_relative_avg'] = update_for_canton['new_cases_relative'].rolling(window=7).mean() |
| 8 | |
| 9 | new_cases_graph = figure(title=canton, y_axis_label='new cases', y_range=[0, max_new_cases], x_axis_type='datetime') |
| 10 | new_cases_graph.line(update_for_canton['date'], update_for_canton['new_cases_relative'], line_width=1) |
| 11 | new_cases_graph.line(update_for_canton['date'], update_for_canton['new_cases_relative_avg'], line_color='red', line_width=1) |
| 12 | graphs.append(new_cases_graph) |
| 13 | |
| 14 | show(create_grid(graphs, sizing_mode='scale_width')) |
| 1 | # calculate total cases |
| 2 | graphs = [] |
| 3 | max_total_cases = conf_cases['total_cases_relative'].max() |
| 4 | for canton in all_cantons: |
| 5 | update_for_canton = conf_cases.loc[conf_cases['abbreviation_canton_and_fl'] == canton] |
| 6 | |
| 7 | total_cases_graph = figure(title=canton, y_axis_label='total cases', y_range=[0, max_total_cases], x_axis_type='datetime') |
| 8 | total_cases_graph.line(update_for_canton['date'], update_for_canton['total_cases_relative'], line_width=1) |
| 9 | graphs.append(total_cases_graph) |
| 10 | |
| 11 | show(create_grid(graphs, sizing_mode='scale_width')) |
| 1 | worldwide_pretty |
| Date | Country_Region | New_Confirmed | Confirmed | New_Deaths | Deaths | |
|---|---|---|---|---|---|---|
| 0 | 2020-02-24 | Afghanistan | 0.0 | 5.0 | 0.0 | 0.0 |
| 1 | 2020-02-25 | Afghanistan | 0.0 | 5.0 | 0.0 | 0.0 |
| 2 | 2020-02-26 | Afghanistan | 0.0 | 5.0 | 0.0 | 0.0 |
| 3 | 2020-02-27 | Afghanistan | 0.0 | 5.0 | 0.0 | 0.0 |
| 4 | 2020-02-28 | Afghanistan | 0.0 | 5.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... |
| 119753 | 2020-03-12 | occupied Palestinian territory | 0.0 | 0.0 | 0.0 | 0.0 |
| 119754 | 2020-03-14 | occupied Palestinian territory | 0.0 | 0.0 | 0.0 | 0.0 |
| 119755 | 2020-03-15 | occupied Palestinian territory | 0.0 | 0.0 | 0.0 | 0.0 |
| 119756 | 2020-03-16 | occupied Palestinian territory | 0.0 | 0.0 | 0.0 | 0.0 |
| 119757 | 2020-03-17 | occupied Palestinian territory | 0.0 | 0.0 | 0.0 | 0.0 |
119758 rows × 6 columns
| 1 | swiss_pretty |
| date | abbreviation_canton_and_fl | new_cases | total_cases | new_deaths | total_deaths | |
|---|---|---|---|---|---|---|
| 2371 | 2020-06-01 | BL | 0.0 | 0.0 | 0.0 | 0.0 |
| 2372 | 2020-06-01 | FR | 0.0 | 0.0 | 0.0 | 0.0 |
| 2373 | 2020-06-01 | GR | 0.0 | 0.0 | 0.0 | 0.0 |
| 2374 | 2020-06-01 | JU | 0.0 | 0.0 | 0.0 | 0.0 |
| 2375 | 2020-06-01 | NE | 0.0 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... |
| 15584 | 2021-11-15 | TG | 1.0 | 28303.0 | 0.0 | 486.0 |
| 15585 | 2021-11-15 | UR | 80.0 | 4345.0 | 0.0 | 47.0 |
| 15586 | 2021-11-15 | BS | 51.0 | 16672.0 | 0.0 | 173.0 |
| 15587 | 2021-11-15 | AI | 44.0 | 2115.0 | 1.0 | 19.0 |
| 15588 | 2021-11-15 | ZH | 410.0 | 148324.0 | 0.0 | 1378.0 |
12404 rows × 6 columns
| 1 |