Published on
481 words3 min read––– views

Python basic data visualization

Python basic data visualization
#Related Topics:

Load Packages

python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set(style='white', color_codes=True)
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

Import csv from Local Machine

python
df=pd.read_csv('gapminder.csv')
#df=pd.read_csv('c:\\Users\\faisal\\Desktop\\Python\\Lesson-2\\gapminder.csv')
#df=pd.read_csv(r'c:\Users\faisal\Desktop\Python\Lesson-2\gapminder.csv')
python
df.head()
country year infant_mortality life_expectancy fertility \
0 Albania 1960 115.40 62.87 6.19
1 Algeria 1960 148.20 47.50 7.65
2 Angola 1960 208.00 35.98 7.32
3 Antigua and Barbuda 1960 NaN 62.97 4.43
4 Argentina 1960 59.87 65.39 3.11
population gdp continent region
0 1636054.0 NaN Europe Southern Europe
1 11124892.0 1.382815e+10 Africa Northern Africa
2 5270844.0 NaN Africa Middle Africa
3 54681.0 NaN Americas Caribbean
4 20619075.0 1.083220e+11 Americas South America
python
df.info()
text
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10545 entries, 0 to 10544
Data columns (total 9 columns):
country 10545 non-null object
year 10545 non-null int64
infant_mortality 9092 non-null float64
life_expectancy 10545 non-null float64
fertility 10358 non-null float64
population 10360 non-null float64
gdp 7573 non-null float64
continent 10545 non-null object
region 10545 non-null object
dtypes: float64(5), int64(1), object(3)
memory usage: 741.5+ KB
python
df['country']=df.country.astype('category')
df['continent']=df.continent.astype('category')
df['region']=df.region.astype('category')
python
df.info()
text
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10545 entries, 0 to 10544
Data columns (total 9 columns):
country 10545 non-null category
year 10545 non-null int64
infant_mortality 9092 non-null float64
life_expectancy 10545 non-null float64
fertility 10358 non-null float64
population 10360 non-null float64
gdp 7573 non-null float64
continent 10545 non-null category
region 10545 non-null category
dtypes: category(3), float64(5), int64(1)
memory usage: 543.0 KB
python
#Distribution
d = sns.distplot(df['life_expectancy'])
python
d = sns.distplot(df['life_expectancy'],bins=10)
python
#Scatterplot:
#plt.figure(figsize=(10,7)) # if you want to resize your plot
df2=df[df.year == 1962] # Filter the dataset only for specific year
s=sns.scatterplot( data=df2,x='fertility',y='life_expectancy',
hue='continent', #color based on continent
)
plt.legend(bbox_to_anchor=(1.04,1), loc="upper left") # legend Position
plt.ylabel("Life Expentency") # y label
plt.xlabel("Fertility") # x label
text
Text(0.5, 0, 'Fertility')
python
#Linear Model plot:
df2=df[df.year == 1962] # Filter the dataset only for specific year
l=sns.lmplot(data=df2,x='fertility',y='life_expectancy',size=7,aspect=1,scatter_kws={"s":100})
plt.ylabel("Life Expentency") # y label
plt.xlabel("Fertility") # x label
text
Text(0.5, 8.96000000000003, 'Fertility')
python
#Boxplot:
plt.figure(figsize=(10,7)) # if you want to resize your plot
bp = sns.boxplot(data=df, x='continent', y= 'life_expectancy')
python
#jointplot:
df2=df[df.year == 1962] # Filter the dataset only for specific year
j = sns.jointplot(data=df2, x='fertility',y='life_expectancy', color='g')
python
j = sns.jointplot(data=df2, x='fertility',y='life_expectancy', color='g', kind='reg')
python
j = sns.jointplot(data=df2, x='fertility',y='life_expectancy', color='g', kind='hex')
sns.set(style='white', color_codes=True)
python
#histogram
plt.hist(df['life_expectancy'], bins=5)
text
(array([ 6., 264., 1984., 3936., 4355.]),
array([13.2 , 27.34, 41.48, 55.62, 69.76, 83.9 ]),
<a list of 5 Patch objects>)
python
df.continent.cat.categories

Index(['Africa', 'Americas', 'Asia', 'Europe', 'Oceania'], dtype='object')

python
df.continent.unique() #SELECT DISTINCT COLUMN_NAME FROM TABLE
text
[Europe, Africa, Americas, Asia, Oceania]
Categories (5, object): [Europe, Africa, Americas, Asia, Oceania]
python
#Stacked Histogram
plt.figure(figsize=(10,7))
plt.hist([df[df.continent=='Africa'].life_expectancy,
df[df.continent=='Americas'].life_expectancy,
df[df.continent=='Asia'].life_expectancy,
df[df.continent=='Europe'].life_expectancy,
df[df.continent=='Oceania'].life_expectancy],
bins=15,
label=['Africa', 'Americas', 'Asia', 'Europe', 'Oceania'],
stacked=True
)
plt.legend()
plt.show()