www.juanbarrios.com
import pandas as pd
import seaborn as sb
from pandas.plotting import table
import numpy as np
import six
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib.dates import date2num, num2date
from matplotlib import dates as mdates
from matplotlib import ticker
from matplotlib.colors import ListedColormap
from matplotlib.patches import Patch
from matplotlib import animation
plt.style.use("ggplot")# for pretty graphs
from IPython.display import clear_output
import ftplib
import os
import math
from sklearn.cluster import KMeans
import scipy.cluster.hierarchy as sch
from sklearn.cluster import AgglomerativeClustering
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
data2=pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv')
data2['date']=pd.to_datetime(data2['date'],format='%Y-%m-%d')
today_data=data2[data2.date==data2.date.max()]
## Verificando la cantidad de datos en la serie , en caso de blancos se asignará ceros
sum(today_data.duplicated(subset = 'location')) == 0
fig, axs = plt.subplots(2,2,figsize = (15,15))
# densidad poblacional -top 10 de paises
top10_population = today_data[['location','population_density']].sort_values('population_density', ascending = False).head(10)
plt1 = sns.barplot(x='location', y='population_density', data= top10_population, ax = axs[0,0])
plt1.set(xlabel = '', ylabel= 'Population density')
# Pobrza extrema -top 10 de paises
top10_total_fer = today_data[['location','extreme_poverty']].sort_values('extreme_poverty', ascending = False).head(10)
plt1 = sns.barplot(x='location', y='extreme_poverty', data= top10_total_fer, ax = axs[0,1])
plt1.set(xlabel = '', ylabel= 'Extreme Poverty')
# Esperanza de vida: paises con la esperanza mas baja
bottom10_life_expec = today_data[['location','life_expectancy']].sort_values('life_expectancy', ascending = True).head(10)
plt1 = sns.barplot(x='location', y='life_expectancy', data= bottom10_life_expec, ax = axs[1,0])
plt1.set(xlabel = '', ylabel= 'Life Expectancy')
# Producto Interno Bruto PIB -últimos 10 paises
bottom10_gdpp = today_data[['location','gdp_per_capita']].sort_values('gdp_per_capita', ascending = True).head(10)
plt1 = sns.barplot(x='location', y='gdp_per_capita', data= bottom10_gdpp, ax = axs[1,1])
plt1.set(xlabel = '', ylabel= 'GDP per capita')
for ax in fig.axes:
plt.sca(ax)
plt.xticks(rotation = 90)
plt.tight_layout()
plt.savefig('eda')
plt.show()
colormap = plt.cm.viridis
plt.figure(figsize=(12,12))
plt.title('Correlación de Pearson para los atributos', y=1.05, size=15)
sb.heatmap(data2.corr(), linewidths=0.1,vmax=1.0, square=True, cmap=colormap, linecolor='white', annot=True)