In this work the objective is to understand outbreak of COVID-19 in Nigeria, I carryout a Comparison of the cases in Nigeria with other African Countries, Futhermore I carryout a Comparison with worst affected countries in Europe and America and finally used Machine Learning and Time Series Forecasting models to give a short-term forecast.
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import datetime as dt
from datetime import timedelta
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from statsmodels.tsa.api import Holt,SimpleExpSmoothing,ExponentialSmoothing
from sklearn.metrics import mean_squared_error,r2_score
import statsmodels.api as sm
from fbprophet import Prophet
covid=pd.read_csv("../input/novel-corona-virus-2019-dataset/covid_19_data.csv")
covid.head()
#Extracting Nigeria's data
covid_nigeria=covid[covid['Country/Region']=="Nigeria"]
#Extracting other countries for comparison of worst affected countries
covid_spain=covid[covid['Country/Region']=="Spain"]
covid_us=covid[covid['Country/Region']=="US"]
covid_italy=covid[covid['Country/Region']=="Italy"]
covid_iran=covid[covid['Country/Region']=="Iran"]
covid_france=covid[covid['Country/Region']=="France"]
covid_uk=covid[covid['Country/Region']=="UK"]
#Extracting data of other African countries
covid_ghana=covid[covid['Country/Region']=="Ghana"]
covid_south_africa=covid[covid['Country/Region']=="South Africa"]
covid_egypt=covid[covid['Country/Region']=="Egypt"]
covid_kenya=covid[covid['Country/Region']=="Kenya"]
covid_ethiopia=covid[covid["Country/Region"]=="Ethiopia"]
covid_senegal=covid[covid["Country/Region"]=="Senegal"]
#Converting the date into Datetime format
covid_nigeria["ObservationDate"]=pd.to_datetime(covid_nigeria["ObservationDate"])
covid_spain["ObservationDate"]=pd.to_datetime(covid_spain["ObservationDate"])
covid_us["ObservationDate"]=pd.to_datetime(covid_us["ObservationDate"])
covid_italy["ObservationDate"]=pd.to_datetime(covid_italy["ObservationDate"])
covid_iran["ObservationDate"]=pd.to_datetime(covid_iran["ObservationDate"])
covid_france["ObservationDate"]=pd.to_datetime(covid_france["ObservationDate"])
covid_uk["ObservationDate"]=pd.to_datetime(covid_uk["ObservationDate"])
covid_ghana["ObservationDate"]=pd.to_datetime(covid_ghana["ObservationDate"])
covid_south_africa["ObservationDate"]=pd.to_datetime(covid_south_africa["ObservationDate"])
covid_egypt["ObservationDate"]=pd.to_datetime(covid_egypt["ObservationDate"])
covid_kenya["ObservationDate"]=pd.to_datetime(covid_kenya["ObservationDate"])
covid_ethiopia["ObservationDate"]=pd.to_datetime(covid_ethiopia["ObservationDate"])
covid_senegal["ObservationDate"]=pd.to_datetime(covid_senegal["ObservationDate"])
#Grouping the data based on the Date
nigeria_datewise=covid_nigeria.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
spain_datewise=covid_spain.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
us_datewise=covid_us.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
italy_datewise=covid_italy.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
iran_datewise=covid_iran.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
france_datewise=covid_france.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
uk_datewise=covid_uk.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
ghana_datewise=covid_ghana.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
south_africa_datewise=covid_south_africa.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
egypt_datewise=covid_egypt.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
kenya_datewise=covid_kenya.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
ethiopia_datewise=covid_ethiopia.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
senegal_datewise=covid_senegal.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
#Adding week column to perfom weekly analysis further ahead
nigeria_datewise["WeekofYear"]=nigeria_datewise.index.weekofyear
spain_datewise["WeekofYear"]=spain_datewise.index.weekofyear
us_datewise["WeekofYear"]=us_datewise.index.weekofyear
italy_datewise["WeekofYear"]=italy_datewise.index.weekofyear
iran_datewise["WeekofYear"]=iran_datewise.index.weekofyear
france_datewise["WeekofYear"]=france_datewise.index.weekofyear
uk_datewise["WeekofYear"]=uk_datewise.index.weekofyear
ghana_datewise["WeekofYear"]=ghana_datewise.index.weekofyear
south_africa_datewise["WeekofYear"]=south_africa_datewise.index.weekofyear
egypt_datewise["WeekofYear"]=egypt_datewise.index.weekofyear
kenya_datewise["WeekofYear"]=kenya_datewise.index.weekofyear
ethiopia_datewise["WeekofYear"]=ethiopia_datewise.index.weekofyear
senegal_datewise["WeekofYear"]=senegal_datewise.index.weekofyear
nigeria_datewise["Days Since"]=(nigeria_datewise.index-nigeria_datewise.index[0])
nigeria_datewise["Days Since"]=nigeria_datewise["Days Since"].dt.days
grouped_country=covid.groupby(["Country/Region","ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
grouped_country["Active Cases"]=grouped_country["Confirmed"]-grouped_country["Recovered"]-grouped_country["Deaths"]
grouped_country["log_confirmed"]=np.log(grouped_country["Confirmed"])
grouped_country["log_active"]=np.log(grouped_country["Active Cases"])
print("Number of Confirmed Cases",nigeria_datewise["Confirmed"].iloc[-1])
print("Number of Recovered Cases",nigeria_datewise["Recovered"].iloc[-1])
print("Number of Death Cases",nigeria_datewise["Deaths"].iloc[-1])
print("Number of Active Cases",nigeria_datewise["Confirmed"].iloc[-1]-nigeria_datewise["Recovered"].iloc[-1]-nigeria_datewise["Deaths"].iloc[-1])
print("Number of Closed Cases",nigeria_datewise["Recovered"].iloc[-1]+nigeria_datewise["Deaths"].iloc[-1])
print("Approximate Number of Confirmed Cases per day",round(nigeria_datewise["Confirmed"].iloc[-1]/nigeria_datewise.shape[0]))
print("Approximate Number of Recovered Cases per day",round(nigeria_datewise["Recovered"].iloc[-1]/nigeria_datewise.shape[0]))
print("Approximate Number of Death Cases per day",round(nigeria_datewise["Deaths"].iloc[-1]/nigeria_datewise.shape[0]))
print("Number of New Cofirmed Cases in last 24 hours are",nigeria_datewise["Confirmed"].iloc[-1]-nigeria_datewise["Confirmed"].iloc[-2])
print("Number of New Recoverd Cases in last 24 hours are",nigeria_datewise["Recovered"].iloc[-1]-nigeria_datewise["Recovered"].iloc[-2])
print("Number of New Death Cases in last 24 hours are",nigeria_datewise["Deaths"].iloc[-1]-nigeria_datewise["Deaths"].iloc[-2])
plt.figure(figsize=(15,5))
sns.barplot(x=nigeria_datewise.index.date,y=nigeria_datewise["Confirmed"]-nigeria_datewise["Recovered"]-nigeria_datewise["Deaths"])
plt.xticks(rotation=90)
plt.ylabel("Number of Cases")
plt.xlabel("Date")
plt.title("Distribution of Number of Active Cases in Nigeria From Feb. 28th - May 5rd")
plt.figure(figsize=(15,5))
sns.barplot(x=nigeria_datewise.index.date,y=nigeria_datewise["Recovered"]+nigeria_datewise["Deaths"])
plt.xticks(rotation=90)
plt.ylabel("Number of Cases")
plt.xlabel("Date")
plt.title("Distribution of Number of Closed Cases in Nigeria From Feb. 28th - May 5rd")
plt.figure(figsize=(10,5))
plt.plot(nigeria_datewise["Confirmed"],label="Confirmed",marker='*')
plt.plot(nigeria_datewise["Recovered"],label="Recovered",marker='o')
plt.plot(nigeria_datewise["Deaths"],label="Deaths",marker="^")
plt.xticks(rotation=90)
plt.ylabel("Number of all Cases")
plt.xlabel("Date")
plt.title("Growth of different types of cases in Nigeria")
plt.legend()
Almost Exponential growth of Confirmed Cases in comparison to Recovered and Death Cases is a conclusive evidence why there is increase in number of Active Cases.
fig, (ax1,ax2) = plt.subplots(1, 2,figsize=(15,5))
ax1.plot((nigeria_datewise["Recovered"]/nigeria_datewise["Confirmed"])*100,label="Recovery Rate",linewidth=3)
ax1.axhline(((nigeria_datewise["Recovered"]/nigeria_datewise["Confirmed"])*100).mean(),linestyle='--',color='black',label="Mean Recovery Rate")
ax1.set_xlabel("Date")
ax1.set_ylabel("Recovery Rate")
ax1.set_title("Recovery Rate of Nigeria over Date")
ax1.legend()
ax2.plot((nigeria_datewise["Deaths"]/nigeria_datewise["Confirmed"])*100,label="Mortality Rate",linewidth=3)
ax2.axhline(((nigeria_datewise["Deaths"]/nigeria_datewise["Confirmed"])*100).mean(),linestyle='--',color='black',label="Mean Mortality Rate")
ax2.set_xlabel("Date")
ax2.set_ylabel("Mortality Rate")
ax2.set_title("Mortality Rate of Nigeria over Date")
ax2.legend()
Recovery Rate was initially very high when the number of positive (Confirmed) cases were low and showed a drastic drop with increasing number of cases. Increasing Mortality rate and dropped Recovery Rate is worrying sign for India.
Increasing Mortality Rate and very slowly increasing Recovery Rate is conclusive evidence for increase in number of Closed Cases
Slightly dipping Mortality Rate and increasing recovery rate is a positive sign
Growth factor is the factor by which a quantity multiplies itself over time. The formula used is:
Formula: Every day's new (Confirmed,Recovered,Deaths) / new (Confirmed,Recovered,Deaths) on the previous day.
A growth factor above 1 indicates an increase correspoding cases.
A growth factor above 1 but trending downward is a positive sign, whereas a growth factor constantly above 1 is the sign of exponential growth.
A growth factor constant at 1 indicates there is no change in any kind of cases.
plt.figure(figsize=(14,6))
plt.plot(nigeria_datewise["Confirmed"]/nigeria_datewise["Confirmed"].shift(),linewidth=3,label="Growth Factor of Confirmed Cases")
plt.plot(nigeria_datewise["Recovered"]/nigeria_datewise["Recovered"].shift(),linewidth=3,label="Growth Factor of Recovered Cases")
plt.plot(nigeria_datewise["Deaths"]/nigeria_datewise["Deaths"].shift(),linewidth=3,label="Growth Factor of Death Cases")
plt.axhline(1,linestyle='--',color='black',label="Baseline")
plt.legend()
plt.title("Datewise Growth Factor of different types of Cases in Nigeria")
plt.xticks(rotation=90)
Growth Factor of Recoverd Cases is constantly very close to 1 indicating the Recovery Rate very low which was high intially as discussed earlier, with Growth Factor of Confirmed and Death Cases well above 1 is an indication of considerable growth in both types of Cases.
plt.figure(figsize=(12,6))
plt.plot(nigeria_datewise["Confirmed"].diff().fillna(0),linewidth=3,label="Confirmed Cases")
plt.plot(nigeria_datewise["Recovered"].diff().fillna(0),linewidth=3,label="Recovered Cases")
plt.plot(nigeria_datewise["Deaths"].diff().fillna(0),linewidth=3,label="Death Cases")
plt.ylabel("Increase in Number of Cases")
plt.xlabel("Date")
plt.title("Daily increase in different types of cases in Nigeria")
plt.xticks(rotation=90)
plt.legend()
week_num_nigeria=[]
nigeria_weekwise_confirmed=[]
nigeria_weekwise_recovered=[]
nigeria_weekwise_deaths=[]
w=1
for i in list(nigeria_datewise["WeekofYear"].unique()):
nigeria_weekwise_confirmed.append(nigeria_datewise[nigeria_datewise["WeekofYear"]==i]["Confirmed"].iloc[-1])
nigeria_weekwise_recovered.append(nigeria_datewise[nigeria_datewise["WeekofYear"]==i]["Recovered"].iloc[-1])
nigeria_weekwise_deaths.append(nigeria_datewise[nigeria_datewise["WeekofYear"]==i]["Deaths"].iloc[-1])
week_num_nigeria.append(w)
w=w+1
plt.figure(figsize=(10,5))
plt.plot(week_num_nigeria,nigeria_weekwise_confirmed,linewidth=3,label="Weekly Growth of Confirmed Cases")
plt.plot(week_num_nigeria,nigeria_weekwise_recovered,linewidth=3,label="Weekly Growth of Recovered Cases")
plt.plot(week_num_nigeria,nigeria_weekwise_deaths,linewidth=3,label="Weekly Growth of Death Cases")
plt.xlabel('Week Number')
plt.ylabel("Number of Cases")
plt.title("Weekly Growth of different types of Cases in igeria")
plt.legend()
print("Average weekly increase in number of Confirmed Cases",round(pd.Series(nigeria_weekwise_confirmed).diff().fillna(0).mean()))
print("Average weekly increase in number of Recovered Cases",round(pd.Series(nigeria_weekwise_recovered).diff().fillna(0).mean()))
print("Average weekly increase in number of Death Cases",round(pd.Series(nigeria_weekwise_deaths).diff().fillna(0).mean()))
fig, (ax1,ax2) = plt.subplots(1, 2,figsize=(15,5))
sns.barplot(x=week_num_nigeria,y=pd.Series(nigeria_weekwise_confirmed).diff().fillna(0),ax=ax1)
sns.barplot(x=week_num_nigeria,y=pd.Series(nigeria_weekwise_deaths).diff().fillna(0),ax=ax2)
ax1.set_xlabel("Week Number")
ax2.set_xlabel("Week Number")
ax1.set_ylabel("Number of Confirmed Cases")
ax2.set_ylabel("Number of Death Cases")
ax1.set_title("Nigeria's Weekwise increase in Number of Confirmed Cases")
ax2.set_title("Nigeria's Weekwise increase in Number of Death Cases")
n_countries=["Ghana","South Africa","Egypt","Kenya","Ethiopia","Senegal","Nigeria"]
comp_data=pd.concat([ghana_datewise.iloc[[-1]],south_africa_datewise.iloc[[-1]],egypt_datewise.iloc[[-1]],kenya_datewise.iloc[[-1]],
ethiopia_datewise.iloc[[-1]],senegal_datewise.iloc[[-1]],nigeria_datewise.iloc[[-1]]])
comp_data.drop(["Days Since","WeekofYear"],1,inplace=True)
comp_data.index=n_countries
comp_data["Mortality"]=(comp_data["Deaths"]/comp_data["Confirmed"])*100
comp_data["Recovery"]=(comp_data["Recovered"]/comp_data["Confirmed"])*100
comp_data["Survival Probability"]=(1-(comp_data["Deaths"]/comp_data["Confirmed"]))*100
comp_data.sort_values(["Confirmed"],ascending=False)
print("South Africa reported it's first confirm case on: ",south_africa_datewise.index[0].date())
print("Egypt reported it's first confirm case on: ",egypt_datewise.index[0].date())
print("Nigeria reported it's first confirm case on: ",nigeria_datewise.index[0].date())
print("Ghana reported it's first confirm case on: ",ghana_datewise.index[0].date())
print("Senegal reported it's first confirm case on: ",senegal_datewise.index[0].date())
print("Kenya reported it's first confirm case on: ",kenya_datewise.index[0].date())
print("Ethiopia reported it's first confirm case on: ",ethiopia_datewise.index[0].date())
print("South Africa reported it's first death case on: ",south_africa_datewise[south_africa_datewise["Deaths"]>0].index[0].date())
print("Egypt reported it's first death case on: ",egypt_datewise[egypt_datewise["Deaths"]>0].index[0].date())
print("Nigeria reported it's first death case on: ",nigeria_datewise[nigeria_datewise["Deaths"]>0].index[0].date())
print("Ghana reported it's first death case on: ",ghana_datewise[ghana_datewise["Deaths"]>0].index[0].date())
print("Senegal reported it's first death case on: ",senegal_datewise[senegal_datewise["Deaths"]>0].index[0].date())
print("Kenya reported it's first death case on: ",kenya_datewise[kenya_datewise["Deaths"]>0].index[0].date())
print("Ethiopia reported it's first death case on: ",ethiopia_datewise[ethiopia_datewise["Deaths"]>0].index[0].date())
plt.figure(figsize=(12,6))
plt.plot(np.log(south_africa_datewise["Confirmed"]),linewidth=3,label="Confirmed Cases South Africa")
plt.plot(np.log(egypt_datewise["Confirmed"]),linewidth=3,label="Confirmed Cases Egypt")
plt.plot(np.log(ghana_datewise["Confirmed"]),linewidth=3,label="Confirmed Cases Ghana")
plt.plot(np.log(senegal_datewise["Confirmed"]),linewidth=3,label="Confirmed Cases Senegal")
plt.plot(np.log(kenya_datewise["Confirmed"]),linewidth=3,label="Confirmed Cases Kenya")
plt.plot(np.log(ethiopia_datewise["Confirmed"]),linewidth=3,label="Confirmed Cases Ethiopia")
plt.plot(np.log(nigeria_datewise["Confirmed"]),linewidth=3,label="Confirmed Cases Nigeria")
plt.legend(loc=2)
plt.title("Confirmed Cases plot for Selected African Countries (Logarithmic Scale)")
fig, (ax1,ax2) = plt.subplots(1, 2,figsize=(18,5))
mean_mortality=[((ghana_datewise["Deaths"]/ghana_datewise["Confirmed"])*100).mean(),((south_africa_datewise["Deaths"]/south_africa_datewise["Confirmed"])*100).mean(),
((egypt_datewise["Deaths"]/egypt_datewise["Confirmed"])*100).mean(),((kenya_datewise["Deaths"]/kenya_datewise["Confirmed"])*100).mean(),
((ethiopia_datewise["Deaths"]/ethiopia_datewise["Confirmed"])*100).mean(),((senegal_datewise["Deaths"]/senegal_datewise["Confirmed"])*100).mean(),
((nigeria_datewise["Deaths"]/nigeria_datewise["Confirmed"])*100).mean()]
mean_recovery=[((ghana_datewise["Recovered"]/ghana_datewise["Confirmed"])*100).mean(),((south_africa_datewise["Recovered"]/south_africa_datewise["Confirmed"])*100).mean(),
((egypt_datewise["Recovered"]/egypt_datewise["Confirmed"])*100).mean(),((kenya_datewise["Recovered"]/kenya_datewise["Confirmed"])*100).mean(),
((ethiopia_datewise["Recovered"]/ethiopia_datewise["Confirmed"])*100).mean(),((senegal_datewise["Recovered"]/senegal_datewise["Confirmed"])*100).mean(),
((nigeria_datewise["Recovered"]/nigeria_datewise["Confirmed"])*100).mean()]
comp_data["Mean Mortality Rate"]=mean_mortality
comp_data["Mean Recovery Rate"]=mean_recovery
sns.barplot(x=comp_data["Mean Mortality Rate"],y=comp_data.index,ax=ax1)
ax1.set_title("Mean Mortatlity Rate of Other African Countries")
ax1.set_ylabel("Country Name")
ax1.set_xlabel("Mortality Rate")
sns.barplot(x=comp_data["Mean Recovery Rate"],y=comp_data.index,ax=ax2)
ax2.set_title("Mean Recovery Rate of Other African Countries")
ax2.set_ylabel("Country Name")
ax2.set_xlabel("Recovery Rate")
country_names=["Spain","US","Italy","Iran","France","UK","Nigeria"]
country_data=pd.concat([spain_datewise.iloc[[-1]],us_datewise.iloc[[-1]],italy_datewise.iloc[[-1]],iran_datewise.iloc[[-1]],
france_datewise.iloc[[-1]],uk_datewise.iloc[[-1]],nigeria_datewise.iloc[[-1]]])
country_data=country_data.drop(["Days Since","WeekofYear"],1)
country_data["Mortality"]=(country_data["Deaths"]/country_data["Confirmed"])*100
country_data["Recovery"]=(country_data["Recovered"]/country_data["Confirmed"])*100
country_data.index=country_names
country_data
max_confirm_nigeria=nigeria_datewise["Confirmed"].iloc[-1]
print("It took",spain_datewise[(spain_datewise["Confirmed"]>0)&(spain_datewise["Confirmed"]<=max_confirm_nigeria)].shape[0],"days in Spain to reach number of Confirmed Cases equivalent to Nigeria")
print("It took",us_datewise[(us_datewise["Confirmed"]>0)&(us_datewise["Confirmed"]<=max_confirm_nigeria)].shape[0],"days in USA to reach number of Confirmed Cases equivalent to Nigeria")
print("It took",italy_datewise[(italy_datewise["Confirmed"]>0)&(italy_datewise["Confirmed"]<=max_confirm_nigeria)].shape[0],"days in Italy to reach number of Confirmed Cases equivalent to Nigeria")
print("It took",iran_datewise[(iran_datewise["Confirmed"]>0)&(iran_datewise["Confirmed"]<=max_confirm_nigeria)].shape[0],"days in Iran to reach number of Confirmed Cases equivalent to Nigeria")
print("It took",france_datewise[(france_datewise["Confirmed"]>0)&(france_datewise["Confirmed"]<=max_confirm_nigeria)].shape[0],"days in France to reach number of Confirmed Cases equivalent to Nigeria")
print("It took",uk_datewise[(uk_datewise["Confirmed"]>0)&(uk_datewise["Confirmed"]<=max_confirm_nigeria)].shape[0],"days in United Kingdom to reach number of Confirmed Cases equivalent to Nigeria")
print("It took",nigeria_datewise[nigeria_datewise["Confirmed"]>0].shape[0],"days in India to reach",max_confirm_nigeria,"Confirmed Cases")
plt.figure(figsize=(12,6))
max_confirm_nigeria=nigeria_datewise["Confirmed"].iloc[-1]
plt.plot(spain_datewise[spain_datewise["Confirmed"]<=max_confirm_nigeria]["Confirmed"],label="Confirmed Cases Spain",linewidth=3)
plt.plot(us_datewise[us_datewise["Confirmed"]<=max_confirm_nigeria]["Confirmed"],label="Confirmed Cases USA",linewidth=3)
plt.plot(italy_datewise[italy_datewise["Confirmed"]<=max_confirm_nigeria]["Confirmed"],label="Confirmed Cases Italy",linewidth=3)
plt.plot(iran_datewise[iran_datewise["Confirmed"]<=max_confirm_nigeria]["Confirmed"],label="Confirmed Cases Iran",linewidth=3)
plt.plot(france_datewise[france_datewise["Confirmed"]<=max_confirm_nigeria]["Confirmed"],label="Confirmed Cases France",linewidth=3)
plt.plot(uk_datewise[uk_datewise["Confirmed"]<=max_confirm_nigeria]["Confirmed"],label="Confirmed Cases UK",linewidth=3)
plt.plot(nigeria_datewise["Confirmed"],label="Confirmed Cases Nigeria",linewidth=3)
plt.ylabel("Number of Confirmed Cases")
plt.xlabel("Date")
plt.legend()
Weekly Comparisons¶
week_num_spain=[]
spain_weekwise_confirmed=[]
spain_weekwise_recovered=[]
spain_weekwise_deaths=[]
w=1
for i in list(spain_datewise["WeekofYear"].unique()):
spain_weekwise_confirmed.append(spain_datewise[spain_datewise["WeekofYear"]==i]["Confirmed"].iloc[-1])
spain_weekwise_recovered.append(spain_datewise[spain_datewise["WeekofYear"]==i]["Recovered"].iloc[-1])
spain_weekwise_deaths.append(spain_datewise[spain_datewise["WeekofYear"]==i]["Deaths"].iloc[-1])
week_num_spain.append(w)
w=w+1
week_num_us=[]
us_weekwise_confirmed=[]
us_weekwise_recovered=[]
us_weekwise_deaths=[]
w=1
for i in list(us_datewise["WeekofYear"].unique()):
us_weekwise_confirmed.append(us_datewise[us_datewise["WeekofYear"]==i]["Confirmed"].iloc[-1])
us_weekwise_recovered.append(us_datewise[us_datewise["WeekofYear"]==i]["Recovered"].iloc[-1])
us_weekwise_deaths.append(us_datewise[us_datewise["WeekofYear"]==i]["Deaths"].iloc[-1])
week_num_us.append(w)
w=w+1
week_num_italy=[]
italy_weekwise_confirmed=[]
italy_weekwise_recovered=[]
italy_weekwise_deaths=[]
w=1
for i in list(italy_datewise["WeekofYear"].unique()):
italy_weekwise_confirmed.append(italy_datewise[italy_datewise["WeekofYear"]==i]["Confirmed"].iloc[-1])
italy_weekwise_recovered.append(italy_datewise[italy_datewise["WeekofYear"]==i]["Recovered"].iloc[-1])
italy_weekwise_deaths.append(italy_datewise[italy_datewise["WeekofYear"]==i]["Deaths"].iloc[-1])
week_num_italy.append(w)
w=w+1
week_num_iran=[]
iran_weekwise_confirmed=[]
iran_weekwise_recovered=[]
iran_weekwise_deaths=[]
w=1
for i in list(iran_datewise["WeekofYear"].unique()):
iran_weekwise_confirmed.append(iran_datewise[iran_datewise["WeekofYear"]==i]["Confirmed"].iloc[-1])
iran_weekwise_recovered.append(iran_datewise[iran_datewise["WeekofYear"]==i]["Recovered"].iloc[-1])
iran_weekwise_deaths.append(iran_datewise[iran_datewise["WeekofYear"]==i]["Deaths"].iloc[-1])
week_num_iran.append(w)
w=w+1
week_num_france=[]
france_weekwise_confirmed=[]
france_weekwise_recovered=[]
france_weekwise_deaths=[]
w=1
for i in list(france_datewise["WeekofYear"].unique()):
france_weekwise_confirmed.append(france_datewise[france_datewise["WeekofYear"]==i]["Confirmed"].iloc[-1])
france_weekwise_recovered.append(france_datewise[france_datewise["WeekofYear"]==i]["Recovered"].iloc[-1])
france_weekwise_deaths.append(france_datewise[france_datewise["WeekofYear"]==i]["Deaths"].iloc[-1])
week_num_france.append(w)
w=w+1
week_num_uk=[]
uk_weekwise_confirmed=[]
uk_weekwise_recovered=[]
uk_weekwise_deaths=[]
w=1
for i in list(uk_datewise["WeekofYear"].unique()):
uk_weekwise_confirmed.append(uk_datewise[uk_datewise["WeekofYear"]==i]["Confirmed"].iloc[-1])
uk_weekwise_recovered.append(uk_datewise[uk_datewise["WeekofYear"]==i]["Recovered"].iloc[-1])
uk_weekwise_deaths.append(uk_datewise[uk_datewise["WeekofYear"]==i]["Deaths"].iloc[-1])
week_num_uk.append(w)
w=w+1
week_num_nigeria=[]
nigeria_weekwise_confirmed=[]
nigeria_weekwise_recovered=[]
nigeria_weekwise_deaths=[]
w=1
for i in list(nigeria_datewise["WeekofYear"].unique()):
nigeria_weekwise_confirmed.append(nigeria_datewise[nigeria_datewise["WeekofYear"]==i]["Confirmed"].iloc[-1])
nigeria_weekwise_recovered.append(nigeria_datewise[nigeria_datewise["WeekofYear"]==i]["Recovered"].iloc[-1])
nigeria_weekwise_deaths.append(nigeria_datewise[uk_datewise["WeekofYear"]==i]["Deaths"].iloc[-1])
week_num_nigeria.append(w)
w=w+1
plt.figure(figsize=(12,6))
plt.plot(week_num_spain,spain_weekwise_confirmed,linewidth=3,label="Confirmed Cases Spain")
plt.plot(week_num_us,us_weekwise_confirmed,linewidth=3,label="Confirmed Cases USA")
plt.plot(week_num_italy,italy_weekwise_confirmed,linewidth=3,label="Confirmed Cases Italy")
plt.plot(week_num_iran,iran_weekwise_confirmed,linewidth=3,label="Confirmed Cases Iran")
plt.plot(week_num_france,france_weekwise_confirmed,linewidth=3,label="Confirmed Cases France")
plt.plot(week_num_uk,uk_weekwise_confirmed,linewidth=3,label="Confirmed Cases UK")
plt.plot(week_num_nigeria,nigeria_weekwise_confirmed,linewidth=3,label="Confirmed Cases Nigeria")
plt.title("Weekly Growth of Confirmed Cases")
plt.xlabel("Week Number")
plt.ylabel("Number of Confirmed Cases")
plt.legend()
fig, ((ax1,ax2,ax3),(ax4,ax5,ax6)) = plt.subplots(2, 3,figsize=(20,10))
sns.barplot(week_num_spain,pd.Series(spain_weekwise_confirmed).diff().fillna(0),ax=ax1)
ax1.set_title("Weekly increase in confirmed Cases in Spain")
ax1.set_ylabel("Number of Confirmed Cases")
ax1.set_xlabel("Week Number")
sns.barplot(week_num_us,pd.Series(us_weekwise_confirmed).diff().fillna(0),ax=ax2)
ax2.set_title("Weekly increase in confirmed Cases in USA")
sns.barplot(week_num_italy,pd.Series(italy_weekwise_confirmed).diff().fillna(0),ax=ax3)
ax3.set_title("Weekly increase in confirmed Cases in Italy")
sns.barplot(week_num_iran,pd.Series(iran_weekwise_confirmed).diff().fillna(0),ax=ax4)
ax4.set_title("Weekly increase in confirmed Cases in Iran")
ax4.set_ylabel("Number of Confirmed Cases")
ax4.set_xlabel("Week Number")
sns.barplot(week_num_france,pd.Series(france_weekwise_confirmed).diff().fillna(0),ax=ax5)
ax5.set_title("Weekly increase in confirmed Cases in France")
sns.barplot(week_num_uk,pd.Series(uk_weekwise_confirmed).diff().fillna(0),ax=ax6)
ax6.set_title("Weekly increase in confirmed Cases in UK")
train_ml=nigeria_datewise.iloc[:int(nigeria_datewise.shape[0]*0.95)]
valid_ml=nigeria_datewise.iloc[int(nigeria_datewise.shape[0]*0.95):]
model_scores=[]
poly = PolynomialFeatures(degree = 9)
train_poly=poly.fit_transform(np.array(train_ml["Days Since"]).reshape(-1,1))
valid_poly=poly.fit_transform(np.array(valid_ml["Days Since"]).reshape(-1,1))
y=train_ml["Confirmed"]
linreg=LinearRegression(normalize=True)
linreg.fit(train_poly,y)
prediction_poly=linreg.predict(valid_poly)
rmse_poly=np.sqrt(mean_squared_error(valid_ml["Confirmed"],prediction_poly))
model_scores.append(rmse_poly)
print("Root Mean Squared Error for Polynomial Regression: ",rmse_poly)
comp_data=poly.fit_transform(np.array(nigeria_datewise["Days Since"]).reshape(-1,1))
plt.figure(figsize=(11,6))
predictions_poly=linreg.predict(comp_data)
plt.plot(nigeria_datewise["Confirmed"],label="Train Confirmed Cases",linewidth=3)
plt.plot(nigeria_datewise.index,predictions_poly, linestyle='--',label="Best Fit for Polynomial Regression",color='black')
plt.xlabel('Time')
plt.ylabel('Confirmed Cases')
plt.title("Confirmed Cases Polynomial Regression Prediction")
plt.xticks(rotation=90)
plt.legend()
new_date=[]
new_prediction_poly=[]
for i in range(1,18):
new_date.append(nigeria_datewise.index[-1]+timedelta(days=i))
new_date_poly=poly.fit_transform(np.array(nigeria_datewise["Days Since"].max()+i).reshape(-1,1))
new_prediction_poly.append(linreg.predict(new_date_poly)[0])
model_predictions=pd.DataFrame(zip(new_date,new_prediction_poly),columns=["Date","Polynomial Regression Prediction"])
model_predictions.head(20)
train_ml=nigeria_datewise.iloc[:int(nigeria_datewise.shape[0]*0.95)]
valid_ml=nigeria_datewise.iloc[int(nigeria_datewise.shape[0]*0.95):]
svm=SVR(C=1,degree=6,kernel='poly')
svm.fit(np.array(train_ml["Days Since"]).reshape(-1,1),train_ml["Confirmed"])
prediction_svm=svm.predict(np.array(valid_ml["Days Since"]).reshape(-1,1))
rmse_svm=np.sqrt(mean_squared_error(prediction_svm,valid_ml["Confirmed"]))
model_scores.append(rmse_svm)
print("Root Mean Square Error for SVR Model: ",rmse_svm)
plt.figure(figsize=(11,6))
predictions=svm.predict(np.array(nigeria_datewise["Days Since"]).reshape(-1,1))
plt.plot(nigeria_datewise["Confirmed"],label="Train Confirmed Cases",linewidth=3)
plt.plot(nigeria_datewise.index,predictions, linestyle='--',label="Best Fit for SVR",color='black')
plt.xlabel('Time')
plt.ylabel('Confirmed Cases')
plt.title("Confirmed Cases Support Vector Machine Regressor Prediction")
plt.xticks(rotation=90)
plt.legend()
new_date=[]
new_prediction_svm=[]
for i in range(1,18):
new_date.append(nigeria_datewise.index[-1]+timedelta(days=i))
new_prediction_svm.append(svm.predict(np.array(nigeria_datewise["Days Since"].max()+i).reshape(-1,1))[0])
model_predictions["SVM Prediction"]=new_prediction_svm
model_predictions.head(20)
model_train=nigeria_datewise.iloc[:int(nigeria_datewise.shape[0]*0.95)]
valid=nigeria_datewise.iloc[int(nigeria_datewise.shape[0]*0.95):]
y_pred=valid.copy()
holt=Holt(np.asarray(model_train["Confirmed"])).fit(smoothing_level=0.2, smoothing_slope=1.1)
y_pred["Holt"]=holt.forecast(len(valid))
print("Root Mean Square Error Holt's Linear Model: ",np.sqrt(mean_squared_error(y_pred["Confirmed"],y_pred["Holt"])))
plt.figure(figsize=(10,5))
plt.plot(model_train.Confirmed,label="Train Set",marker='o')
valid.Confirmed.plot(label="Validation Set",marker='*')
y_pred.Holt.plot(label="Holt's Linear Model Predicted Set",marker='^')
plt.ylabel("Confirmed Cases")
plt.xlabel("Date Time")
plt.title("Confirmed Cases Holt's Linear Model Prediction Covid-19 Nigeria")
plt.xticks(rotation=90)
plt.legend()
holt_new_prediction=[]
for i in range(1,18):
holt_new_prediction.append(holt.forecast((len(valid)+i))[-1])
model_predictions["Holt's Linear Model Prediction"]=holt_new_prediction
model_predictions.head(20)
model_train=nigeria_datewise.iloc[:int(nigeria_datewise.shape[0]*0.95)]
valid=nigeria_datewise.iloc[int(nigeria_datewise.shape[0]*0.95):]
y_pred=valid.copy()
es=ExponentialSmoothing(np.asarray(model_train['Confirmed']),seasonal_periods=3,trend='mul', seasonal='add').fit()
y_pred["Holt's Winter Model"]=es.forecast(len(valid))
print("Root Mean Square Error for Holt's Winter Model: ",np.sqrt(mean_squared_error(y_pred["Confirmed"],y_pred["Holt's Winter Model"])))
plt.figure(figsize=(10,5))
plt.plot(model_train.Confirmed,label="Train Set",marker='o')
valid.Confirmed.plot(label="Validation Set",marker='*')
y_pred["Holt\'s Winter Model"].plot(label="Holt's Winter Model Predicted Set",marker='^')
plt.ylabel("Confirmed Cases")
plt.xlabel("Date Time")
plt.title("Confirmed Cases Holt's Winter Model Prediction Covid-19 in Nigeria")
plt.xticks(rotation=90)
plt.legend()
holt_winter_new_prediction=[]
for i in range(1,18):
holt_winter_new_prediction.append(es.forecast((len(valid)+i))[-1])
model_predictions["Holt's Winter Model Prediction"]=holt_winter_new_prediction
model_predictions.head(20)
prophet_c=Prophet(interval_width=0.95,weekly_seasonality=True,)
prophet_confirmed=pd.DataFrame(zip(list(nigeria_datewise.index),list(nigeria_datewise["Confirmed"])),columns=['ds','y'])
prophet_c.fit(prophet_confirmed)
forecast_c=prophet_c.make_future_dataframe(periods=17)
forecast_confirmed=forecast_c.copy()
confirmed_forecast=prophet_c.predict(forecast_c)
print("Root Mean Squared Error for Prophet Model: ",np.sqrt(mean_squared_error(nigeria_datewise["Confirmed"],confirmed_forecast['yhat'].head(nigeria_datewise.shape[0]))))
print(prophet_c.plot(confirmed_forecast))
print(prophet_c.plot_components(confirmed_forecast))
model_predictions["Prophet's Prediction"]=list(confirmed_forecast["yhat"].tail(17))
model_predictions["Prophet's Upper Bound"]=list(confirmed_forecast["yhat_upper"].tail(17))
model_predictions.head(20)
model_predictions["Average of Predictions Models"]=model_predictions.mean(axis=1)
model_predictions.head(20)
The average of the prediction from the five models are evaluated for the period from the 4th to 20th of June, 2020 is a cumulative count of 15494 infection cases on the 20th of June, 2020 if no measures of curtailing it are put in place. These short-term forecasts models presented in this work can be useful for real-time preparedness by government and relevant authorities in Nigeria.