defMergeData(df,countries): #接收df:疫情数据集,countries:国家地理数据集 #将台湾地区设置为中国 df['Country/Region'].replace('Taiwan*','China',inplace=True) countries['SOVEREIGNT'].replace('Taiwan','China',inplace=True) #删除疫情数据集中的经纬度数据,因为这在地理数据集中有 df = df.drop(columns=['Lat','Long']) #因为疫情数据集中有一些国家是按省份、州进行统计的,一国数据分散,所以要把它们合并 df = df.groupby('Country/Region').sum() #简化地理数据集,只用保留坐标列和主权名即可 countries = countries.drop(columns=[col for col in countries.columns if col notin ['SOVEREIGNT','geometry']]) #然后尽可能地将地理数据集中的国家名改成跟疫情数据集一样 countries['SOVEREIGNT'].replace('United States of America','US',inplace=True) countries['SOVEREIGNT'].replace('South Korea','Korea, South',inplace=True) countries['SOVEREIGNT'].replace('Vatican','Holy See',inplace=True) countries['SOVEREIGNT'].replace('eSwatini','Eswatini',inplace=True) countries['SOVEREIGNT'].replace('United Republic of Tanzania','Tanzania',inplace=True) #合并数据集 return countries.join(df,how='right',on='SOVEREIGNT')
现在就可以简单地用这个合并后的数据集绘制某一天的世界疫情图看看:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
df = getData() countries = getGeoData() mergeData = MergeData(df,countries)