Python basic data visualization

Updated by Faisal Akbar 3 min read
Table of contents

Load Packages

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns; sns.set(style='white', color_codes=True)
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

Import csv from Local Machine:

df=pd.read_csv('gapminder.csv')

#df=pd.read_csv('c:\\Users\\faisal\\Desktop\\Python\\Lesson-2\\gapminder.csv')
#df=pd.read_csv(r'c:\Users\faisal\Desktop\Python\Lesson-2\gapminder.csv')
df.head()
               country  year  infant_mortality  life_expectancy  fertility  \
0              Albania  1960            115.40            62.87       6.19
1              Algeria  1960            148.20            47.50       7.65
2               Angola  1960            208.00            35.98       7.32
3  Antigua and Barbuda  1960               NaN            62.97       4.43
4            Argentina  1960             59.87            65.39       3.11

   population           gdp continent           region
0   1636054.0           NaN    Europe  Southern Europe
1  11124892.0  1.382815e+10    Africa  Northern Africa
2   5270844.0           NaN    Africa    Middle Africa
3     54681.0           NaN  Americas        Caribbean
4  20619075.0  1.083220e+11  Americas    South America
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10545 entries, 0 to 10544
Data columns (total 9 columns):
country             10545 non-null object
year                10545 non-null int64
infant_mortality    9092 non-null float64
life_expectancy     10545 non-null float64
fertility           10358 non-null float64
population          10360 non-null float64
gdp                 7573 non-null float64
continent           10545 non-null object
region              10545 non-null object
dtypes: float64(5), int64(1), object(3)
memory usage: 741.5+ KB
df['country']=df.country.astype('category')
df['continent']=df.continent.astype('category')
df['region']=df.region.astype('category')
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10545 entries, 0 to 10544
Data columns (total 9 columns):
country             10545 non-null category
year                10545 non-null int64
infant_mortality    9092 non-null float64
life_expectancy     10545 non-null float64
fertility           10358 non-null float64
population          10360 non-null float64
gdp                 7573 non-null float64
continent           10545 non-null category
region              10545 non-null category
dtypes: category(3), float64(5), int64(1)
memory usage: 543.0 KB
#Distribution
d = sns.distplot(df['life_expectancy'])

3dbccfaf39e0e8d6007a850a5e5cdfdc90926df7

d = sns.distplot(df['life_expectancy'],bins=10)

632def7ab5c378e773d4304bda0bb911aacbf9e1

#Scatterplot:
#plt.figure(figsize=(10,7)) # if you want to resize your plot
df2=df[df.year == 1962] # Filter the dataset only for specific year
s=sns.scatterplot( data=df2,x='fertility',y='life_expectancy',
                  hue='continent', #color based on continent
                                )
plt.legend(bbox_to_anchor=(1.04,1), loc="upper left") # legend Position
plt.ylabel("Life Expentency") # y label
plt.xlabel("Fertility") # x label
Text(0.5, 0, 'Fertility')

d2557b4e9bc089ea2f728b78ba44cf70e415944e

#Linear Model plot:
df2=df[df.year == 1962] # Filter the dataset only for specific year
l=sns.lmplot(data=df2,x='fertility',y='life_expectancy',size=7,aspect=1,scatter_kws={"s":100})
plt.ylabel("Life Expentency") # y label
plt.xlabel("Fertility") # x label
Text(0.5, 8.96000000000003, 'Fertility')

317db54fe0428b053c57ed392496926b151ff7af

#Boxplot:
plt.figure(figsize=(10,7)) # if you want to resize your plot
bp = sns.boxplot(data=df, x='continent', y= 'life_expectancy')

bf05fc59d3ae93742eda8f9931552616b4a3ef50

#jointplot:
df2=df[df.year == 1962] # Filter the dataset only for specific year
j = sns.jointplot(data=df2, x='fertility',y='life_expectancy', color='g')

b8fe4d7a83b92aa3a49c74015acc47a6c7c708f7

j = sns.jointplot(data=df2, x='fertility',y='life_expectancy', color='g', kind='reg')

3f7271a156a965ec2383934af2c8977bd06c0d35

j = sns.jointplot(data=df2, x='fertility',y='life_expectancy', color='g', kind='hex')

sns.set(style='white', color_codes=True)

3cb029028d2b713192db38f61cefa3c542cc5a9d

#histogram
plt.hist(df['life_expectancy'], bins=5)
(array([   6.,  264., 1984., 3936., 4355.]),
 array([13.2 , 27.34, 41.48, 55.62, 69.76, 83.9 ]),
 <a list of 5 Patch objects>)

92c55bf9579229301a968383ca41d48898d2dee2

df.continent.cat.categories
Index(['Africa', 'Americas', 'Asia', 'Europe', 'Oceania'], dtype='object')
df.continent.unique() #SELECT DISTINCT COLUMN_NAME FROM TABLE
[Europe, Africa, Americas, Asia, Oceania]
Categories (5, object): [Europe, Africa, Americas, Asia, Oceania]
#Stacked Histogram
plt.figure(figsize=(10,7))
plt.hist([df[df.continent=='Africa'].life_expectancy,
         df[df.continent=='Americas'].life_expectancy,
         df[df.continent=='Asia'].life_expectancy,
         df[df.continent=='Europe'].life_expectancy,
         df[df.continent=='Oceania'].life_expectancy],
         bins=15,
         label=['Africa', 'Americas', 'Asia', 'Europe', 'Oceania'],
         stacked=True
)
plt.legend()
plt.show()

dd58b20a0088f2ad2579850e60f8d7e13cb92ee6