import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
%matplotlib inline


refugee_number = pd.read_csv('refugee_in_perc_wb.csv')
refugee_number.set_index('Country', drop = True, inplace= True)

gdp_per_capita = pd.read_csv('income_per_person_gdp.csv')
gdp_per_capita.set_index('Country', drop = True, inplace= True)

population_growth = pd.read_csv('population_growth_annual_percent.csv')
population_growth.set_index('country', drop = True, inplace = True)


population_growth.info(), gdp_per_capita.info(), refugee_number.info()

<class 'pandas.core.frame.DataFrame'>
Index: 194 entries, Afghanistan to Zimbabwe
Data columns (total 58 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   1960    191 non-null    float64
 1   1961    192 non-null    float64
 2   1962    192 non-null    float64
 3   1963    192 non-null    float64
 4   1964    192 non-null    float64
 5   1965    192 non-null    float64
 6   1966    192 non-null    float64
 7   1967    192 non-null    float64
 8   1968    192 non-null    float64
 9   1969    192 non-null    float64
 10  1970    192 non-null    float64
 11  1971    192 non-null    float64
 12  1972    192 non-null    float64
 13  1973    192 non-null    float64
 14  1974    192 non-null    float64
 15  1975    192 non-null    float64
 16  1976    192 non-null    float64
 17  1977    192 non-null    float64
 18  1978    192 non-null    float64
 19  1979    192 non-null    float64
 20  1980    192 non-null    float64
 21  1981    192 non-null    float64
 22  1982    192 non-null    float64
 23  1983    192 non-null    float64
 24  1984    192 non-null    float64
 25  1985    192 non-null    float64
 26  1986    192 non-null    float64
 27  1987    192 non-null    float64
 28  1988    192 non-null    float64
 29  1989    192 non-null    float64
 30  1990    193 non-null    float64
 31  1991    193 non-null    float64
 32  1992    193 non-null    float64
 33  1993    193 non-null    float64
 34  1994    193 non-null    float64
 35  1995    193 non-null    float64
 36  1996    194 non-null    float64
 37  1997    194 non-null    float64
 38  1998    194 non-null    float64
 39  1999    194 non-null    float64
 40  2000    194 non-null    float64
 41  2001    194 non-null    float64
 42  2002    194 non-null    float64
 43  2003    194 non-null    float64
 44  2004    194 non-null    float64
 45  2005    194 non-null    float64
 46  2006    194 non-null    float64
 47  2007    194 non-null    float64
 48  2008    194 non-null    float64
 49  2009    194 non-null    float64
 50  2010    194 non-null    float64
 51  2011    193 non-null    float64
 52  2012    193 non-null    float64
 53  2013    193 non-null    float64
 54  2014    193 non-null    float64
 55  2015    193 non-null    float64
 56  2016    193 non-null    float64
 57  2017    193 non-null    float64
dtypes: float64(58)
memory usage: 88.7+ KB
<class 'pandas.core.frame.DataFrame'>
Index: 193 entries, Afghanistan to Zimbabwe
Columns: 241 entries, 1800 to 2040
dtypes: int64(241)
memory usage: 364.1+ KB
<class 'pandas.core.frame.DataFrame'>
Index: 194 entries, Afghanistan to Zimbabwe
Data columns (total 29 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   1990    194 non-null    float64
 1   1991    194 non-null    float64
 2   1992    194 non-null    float64
 3   1993    194 non-null    float64
 4   1994    194 non-null    float64
 5   1995    194 non-null    float64
 6   1996    194 non-null    float64
 7   1997    194 non-null    float64
 8   1998    194 non-null    float64
 9   1999    194 non-null    float64
 10  2000    194 non-null    float64
 11  2001    194 non-null    float64
 12  2002    194 non-null    float64
 13  2003    194 non-null    float64
 14  2004    194 non-null    float64
 15  2005    194 non-null    float64
 16  2006    194 non-null    float64
 17  2007    194 non-null    float64
 18  2008    194 non-null    float64
 19  2009    194 non-null    float64
 20  2010    194 non-null    float64
 21  2011    194 non-null    float64
 22  2012    194 non-null    float64
 23  2013    194 non-null    float64
 24  2014    194 non-null    float64
 25  2015    194 non-null    float64
 26  2016    194 non-null    float64
 27  2017    194 non-null    float64
 28  2018    194 non-null    float64
dtypes: float64(29)
memory usage: 44.7+ KB

(None, None, None)


refugee_number.drop(refugee_number.loc[:,:'2016'].columns, axis = 1, inplace= True) #to drop all years except 2017
refugee_number.drop(['2018'], axis = 1, inplace = True)


gdp_per_capita.drop(gdp_per_capita.loc[:,:'2016'].columns, axis = 1, inplace= True)
gdp_per_capita.drop(gdp_per_capita.loc[:, '2018':].columns, axis = 1, inplace= True)


population_growth.drop(population_growth.loc[:,:'2016'].columns, axis = 1, inplace= True)


gdp_per_capita.head()


(refugee_number.duplicated().sum().sum(), gdp_per_capita.duplicated().sum().sum(), population_growth.duplicated().sum().sum())

(38, 16, 27)


(refugee_number.isna().sum().sum(), gdp_per_capita.isna().sum().sum(), population_growth.isna().sum().sum())

(0, 0, 1)


population_growth.dropna(inplace= True)


population_growth.isna().sum().sum()

0


gdp_per_capita.describe()


# To determine the country with the lowest, highest and median GDP per capita:
gdp_countries = gdp_per_capita[gdp_per_capita['2017'].isin(["627", "113000", "11700"])]


gdp_countries

# So the three countries are as follow:


refugee = refugee_number.loc[['Somalia', 'Qatar', 'Sri Lanka']]
refugee.rename(columns = {'2017': 'Refugee'}, inplace=True)

population = population_growth.loc[['Somalia', 'Qatar', 'Sri Lanka']]
population.rename(columns = {'2017': 'Population'}, inplace = True)

gdp = gdp_countries.loc[['Somalia', 'Qatar', 'Sri Lanka']]
gdp.rename(columns = {'2017': 'GDP'}, inplace = True)

mean_gdp = gdp_per_capita.mean()
meanGDP = mean_gdp.to_frame().T


total = gdp.join([refugee, population])


total


total['GDP'].plot.bar();


total['Population'].plot.bar();


total['Refugee'].plot.bar();


total.plot.bar(y = ['Refugee', 'Population']);


total.plot.bar(y = ['GDP', 'Population',]);


total.plot(kind='scatter',x='Population',y='Refugee',color='red')
plt.show()


total.plot(kind='scatter',x='GDP',y='Refugee',color='red')
plt.show()


!jupyter nbconvert "Exploration of Loan Data from Prosper.ipynb" --to html --output-dir='C:\Users\Ajeet\OneDrive\Projects\Blogging\ajeethaa.github.io'

C:\Users\Ajeet\anaconda3\lib\site-packages\traitlets\traitlets.py:2195: FutureWarning: Supporting extra quotes around Unicode is deprecated in traitlets 5.0. Use 'C:\\Users\\Ajeet\\OneDrive\\Projects\\Blogging\\ajeethaa.github.io' instead of "'C:\\Users\\Ajeet\\OneDrive\\Projects\\Blogging\\ajeethaa.github.io'" – or use CUnicode.
  warn(
[NbConvertApp] Converting notebook Exploration of Loan Data from Prosper.ipynb to html
[NbConvertApp] Writing 1192320 bytes to C:\Users\Ajeet\OneDrive\Projects\Blogging\ajeethaa.github.io\Exploration of Loan Data from Prosper.html

	2017
Country
Afghanistan	1760
Albania	11800
Algeria	13900
Andorra	49800
Angola	6040

	2017
count	193.000000
mean	18172.699482
std	19393.559289
min	627.000000
25%	3650.000000
50%	11700.000000
75%	26400.000000
max	113000.000000

Project: To find out whether a country with the highest GDP per capita and population growth would host more refugees as opposed to the lowest.¶

Table of Contents¶

Introduction¶

Data Wrangling¶

General Properties¶

Data Cleaning¶

Exploratory Data Analysis¶

How do the countries fare when they are compared based on the indicators?¶

Do richer country host more refugees? Do country that has the least population growth take in more refugees to mitigate the effect of declining population?¶

Conclusions¶

References:¶

	GDP	Refugee	Population
Country
Somalia	627	0.09980	2.92
Qatar	113000	0.00690	2.67
Sri Lanka	11700	0.00388	1.13