#Importing the packages:

import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
import os
!pip install --quiet pycountry_convert
from pycountry_convert import country_alpha2_to_country_name, country_name_to_country_alpha3


#Importing the uncleaned world data:
world_data = pd.read_csv('The Global Dataset 14 Apr 2020.csv', dtype={'typeOfSexConcatenated': str, 'RecruiterRelationship': str, 'majorityStatusAtExploit':str})


#Let's take a quick look at the data
world_data.head()


world_data.shape

(48801, 64)


world_data.head()


#Creating a subset of the orginal dataframe
world_data = world_data[['CountryOfExploitation', 'yearOfRegistration', 'Datasource', 
                         'gender', 'ageBroad', 'majorityStatus', 'majorityStatusAtExploit', 
                         'majorityEntry', 'citizenship', 'isForcedLabour', 'isSexualExploit', 
                         'isOtherExploit', 'isSexAndLabour','typeOfExploitConcatenated']]


#Replacing the values -99 to represent null values
world_data.replace('-99', np.nan, inplace=True)
world_data.replace(-99, np.nan, inplace=True)


world_data.head()


world_data.shape

(48801, 14)


world_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48801 entries, 0 to 48800
Data columns (total 14 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   CountryOfExploitation      38626 non-null  object 
 1   yearOfRegistration         48801 non-null  int64  
 2   Datasource                 48801 non-null  object 
 3   gender                     48801 non-null  object 
 4   ageBroad                   36439 non-null  object 
 5   majorityStatus             36439 non-null  object 
 6   majorityStatusAtExploit    9290 non-null   object 
 7   majorityEntry              6491 non-null   object 
 8   citizenship                48523 non-null  object 
 9   isForcedLabour             26102 non-null  float64
 10  isSexualExploit            23861 non-null  float64
 11  isOtherExploit             30938 non-null  float64
 12  isSexAndLabour             23456 non-null  float64
 13  typeOfExploitConcatenated  32627 non-null  object 
dtypes: float64(4), int64(1), object(9)
memory usage: 5.2+ MB


#Setting new plot defaults
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
sns.set_palette('Set3')
sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['figure.figsize'] = (9, 5)
matplotlib.rcParams['figure.facecolor'] = '#00000000'


#Selecting a new dataframe of only surviors who were exploited in the US.
us_data = world_data[world_data['CountryOfExploitation']=='US']


#Selecting a new dataframe of only US surviors who were recorded in 2017 and 2018.
us_17_18_data = us_data[us_data['yearOfRegistration'] >=2017]


#Checking a random sample to make sure the new dataframe is correctly filtered.
us_17_18_data.sample(10)


#How many survivors were reported in 2017 vs. 2018? 
ax = sns.barplot(data=us_17_18_data, 
                 x='yearOfRegistration', 
                 y=us_17_18_data.index);


#Exploring the number of male and female survivors
sns.catplot(x='gender', 
            kind='count', 
            palette="RdBu", 
            data=us_17_18_data);


#Creating a plotly pie chart to break down gender prevalence:
gender = us_17_18_data.groupby(['gender', 'ageBroad']).size().reset_index()
gender.rename(columns = {0:'Number of Survivors'}, inplace=True)

fig = px.pie(gender.groupby('gender').sum().reset_index(), 
             values = 'Number of Survivors', 
             names = 'gender', 
             title = 'Gender of Human Trafficking Survivors', 
             color_discrete_sequence=px.colors.sequential.GnBu)
fig.show();


#At what age range did these victims become survivors?
count = us_17_18_data.ageBroad.value_counts()
plt.plot(count);


fig = px.bar(gender, x = 'ageBroad', y = 'Number of Survivors', color = 'gender', color_discrete_sequence=px.colors.qualitative.D3,
            category_orders = {'ageBroad': ['0--8', '9--17', '18--20', '21--23', '24--26', '27--29', '30--38', '39--47', '48+']})
fig.show()


#Below, we are going to make a stacked bar chart using plotly.
us_17_18_data['Survivors'] = len(us_17_18_data) #new column made

data_bar_mg = pd.DataFrame(us_17_18_data.groupby(['gender', 'majorityStatus'])['majorityStatus'].agg(Survivors='count')).reset_index() #creating a dataframe aggregating age groups
fig = px.bar(data_bar_mg, x="majorityStatus", y="Survivors", color="gender", 
            title="Human Trafficking Demographics at Time of Survivors Status",
            labels={'majorityStatus':'Age'},
              color_discrete_sequence= px.colors.sequential.Plasma_r) #this is the plot
fig.update_traces(texttemplate='%{value}', textposition='outside') #adding values to the plot
fig.update_layout(hovermode='x') #adding info when hovering over the plot
fig.show();


#Using a similar plotly graph to the graph above, exploring age at first exploit.
exploit = us_17_18_data.groupby(['gender', 'majorityStatusAtExploit']).size().reset_index()
exploit.rename(columns = {0:'Number of Survivors'}, inplace=True)

fig = px.bar(exploit, x = 'majorityStatusAtExploit', 
             y = 'Number of Survivors', 
             color = 'gender',
             title="Human Trafficking Demographics at Time of First Exploit",
             labels={'majorityStatusAtExploit':'Age'},
            color_discrete_sequence= px.colors.sequential.Plasma_r,
            category_orders = {'majorityStatusAtExploit': ['Minor', 'Adult']})
fig.update_traces(texttemplate='%{value}', textposition='outside')
fig.show()


#Using seaborn catplot to explore categories of exploit
g = sns.catplot(x='typeOfExploitConcatenated', 
            kind='count', 
            height=10, 
            palette="Set3", 
            data=us_17_18_data);
g.set_axis_labels("", "Number of Survivors").set_xticklabels(["Sexual Exploit", "Labor Exploit", "Both"]).despine(left=True);


#Let's draw a map of where the survivors are being reported.
#First we have to get the countries into a plottable format using Python:

def get_alpha3(col):
    try:
        iso_3 =  country_name_to_country_alpha3(col)
    except:
        iso_3 = np.nan
    return iso_3

def get_name(col):
    try:
        name =  country_alpha2_to_country_name(col)
    except:
        name = np.nan
    return name


#Now let's group by country of exploitation:
world_data['CountryOfExploitation'] = world_data['CountryOfExploitation'].apply(lambda x: get_name(x)) #renaming the country of exploitation using function above
world_data['alpha_3'] = world_data['CountryOfExploitation'].apply(lambda x: get_alpha3(x)) #renaming the country to a 3 letter abbrevation, and making new column
exploitation_map = pd.DataFrame(world_data.groupby(['CountryOfExploitation', 'alpha_3'])['alpha_3'].agg(Survivors='count')).reset_index() #creating a dataframe with the new columns and renamed countries so plotly can read it


#Now for the plotly map graph:
fig = px.choropleth(exploitation_map, locations='alpha_3',
                    color='Survivors',
                    hover_name='CountryOfExploitation',
                    color_continuous_scale='Viridis_r')
fig.update_layout(title_text="Human Trafficking Surviors Based on Reported Country of Exploitation")
fig.show()


#What are the numbers?
exploitation_map[['CountryOfExploitation', 'Survivors']].set_index('CountryOfExploitation').sort_values(by='Survivors', ascending=False).head(10) #creating a quick series table


#Let's look at this as a dataframe:
most_common_exploit = world_data.typeOfExploitConcatenated.value_counts(ascending=False).head(5) #taking only the top 5
most_common_exploit

Sexual exploitation              15989
Forced labour                     8969
Other                             7063
Slavery and similar practices      359
Forced marriage                    168
Name: typeOfExploitConcatenated, dtype: int64


#Seaborn bar graph of the most prevalent forms of exploitation:
g = sns.barplot(x=most_common_exploit.index, y=most_common_exploit.values)
labels=['Sexual Exploit', 'Forced Labor', 'Other', 'Slavery or Simiar', 'Forced Marriage'] #renaming the columns for ease of reading
g.set_xticklabels(labels=labels, rotation=80);


#Using plotly to view male vs. female survivors in a pie chart
gender = world_data.groupby(['gender', 'ageBroad']).size().reset_index() #first creating a subset dataframe
gender.rename(columns = {0:'Number of Survivors'}, inplace=True) #renaming the columns

fig = px.pie(gender.groupby('gender').sum().reset_index(), 
             values = 'Number of Survivors', 
             names = 'gender', 
             title = 'Gender of Human Trafficking Survivors', 
             color_discrete_sequence=px.colors.sequential.RdBu) #graph created
fig.show();

	CountryOfExploitation	yearOfRegistration	Datasource	gender	ageBroad	majorityStatus	majorityStatusAtExploit	majorityEntry	citizenship	isSexualExploit	typeOfExploitConcatenated
0	NaN	2002	Case Management	Female	18--20	Adult	NaN	NaN	CO	1.0	Sexual exploitation
1	NaN	2002	Case Management	Female	18--20	Adult	NaN	NaN	CO	1.0	Sexual exploitation
2	NaN	2002	Case Management	Female	18--20	Adult	NaN	NaN	CO	1.0	Sexual exploitation
3	NaN	2002	Case Management	Female	18--20	Adult	NaN	NaN	CO	1.0	Sexual exploitation
4	NaN	2002	Case Management	Female	18--20	Adult	NaN	NaN	CO	1.0	Sexual exploitation

	CountryOfExploitation	yearOfRegistration	Datasource	gender	ageBroad	majorityStatus	majorityStatusAtExploit	majorityEntry	citizenship	isForcedLabour	isSexualExploit	isSexAndLabour	typeOfExploitConcatenated
48361	US	2018	Hotline	Female	9--17	Minor	Minor	NaN	00	NaN	NaN	NaN	NaN
40891	US	2017	Hotline	Female	27--29	Adult	NaN	NaN	US	0.0	1.0	0.0	Sexual exploitation
40123	US	2017	Hotline	Female	21--23	Adult	Minor	NaN	00	0.0	1.0	0.0	Sexual exploitation
48610	US	2018	Hotline	Male	0--8	Minor	Minor	Minor	00	0.0	1.0	0.0	Sexual exploitation
41418	US	2017	Hotline	Female	39--47	Adult	NaN	NaN	US	0.0	1.0	0.0	Sexual exploitation
46618	US	2018	Hotline	Female	30--38	Adult	NaN	NaN	00	0.0	1.0	0.0	Sexual exploitation
45355	US	2018	Hotline	Female	21--23	Adult	NaN	NaN	00	0.0	1.0	0.0	Sexual exploitation
45556	US	2018	Hotline	Female	21--23	Adult	NaN	NaN	00	0.0	1.0	0.0	Sexual exploitation
46396	US	2018	Hotline	Female	30--38	Adult	NaN	NaN	00	0.0	1.0	0.0	Sexual exploitation
47529	US	2018	Hotline	Female	9--17	Minor	Minor	NaN	00	0.0	1.0	0.0	Sexual exploitation

	Survivors
CountryOfExploitation
United States	12512
Ukraine	5399
Moldova, Republic of	4504
Russian Federation	2738
Philippines	1988
Indonesia	1777
Cambodia	1000
Malaysia	930
Ghana	544
United Arab Emirates	504

Human Trafficking, an Exploration of Survivors¶

Downloading the Dataset¶

Data Preparation and Cleaning¶

Exploratory Analysis and Visualization¶

Our new dataframe, reported US survivors of human trafficking for 2017 & 2018¶

Plotting¶

Asking and Answering Questions¶

Q1: Are survivors more likely to be adults or minors?¶

Q2: At what age did these survivors first enter human trafficking?¶

Q3: What type of exploitation has been occuring to these survivors?¶

Q4: A broader view: How does the US statistics compare to the rest of the world based on all the data, from 2002-2019.¶

Q5: Global exploitation trends, what are they?¶

Q6: Are males a miniorty of survivors globally?¶

Inferences and Conclusion¶

References and Future Work¶