Copyright 2020 Arjuna Sky Kok
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
dataset_url = "https://arjunaskykok.s3-ap-southeast-1.amazonaws.com/analisa/c_level_vp_level_perusahaan_teknologi_besar_2020_12_13.csv"
dataset = pd.read_csv(dataset_url, dtype={"Prediksi Umur": "Int64", "Prediksi Umur Saat Capai Posisi VP/C-level": "Int64"})
dataset.head()
company_str = "Perusahaan"
name_str = "Nama"
linkedin_str = "Linkedin"
category_str = "Kategori"
vp_str = "VP-level"
c_str = "C-level"
age_str = "Prediksi Umur"
age_c_vp_str = "Prediksi Umur Saat Capai Posisi VP/C-level"
s1_uni_str = "Universitas S1"
s1_major_str = "Jurusan S1"
s1_country_str = "Negara S1"
s2_uni_str = "Universitas S2"
s2_major_str = "Jurusan S2"
s2_country_str = "Negara S2"
s3_uni_str = "Universitas S3"
s3_major_str = "Jurusan S3"
s3_country_str = "Negara S3"
education_country_normalized_str = "Normalisasi Negara Pendidikan"
education_country_str = "Negara Pendidikan Secara Umum"
sex_str = "Seks"
working_overseas_str = "Kerja di Luar Negeri Sebelumnya"
hotshot_companies_str = "Pernah Kerja di Perusahaan Ternama"
citizen_str = "Prediksi Kewarganegaraan"
dataset.drop(dataset[dataset[category_str]=="Pendiri"].index, inplace=True)
dataset[sex_str].value_counts()
labels = ["Laki-laki", "Perempuan"]
numbers = [141, 52]
plt.pie(numbers, labels=labels)
plt.show()
dataset[age_str].mean()
dataset[age_c_vp_str].mean()
dataset[education_country_str].value_counts()
labels = ["Lulusan Dalam Negeri", "Lulusan Luar Negeri Non-Amerika", "Lulusan Amerika"]
numbers = [67, 74, 46]
barlist = plt.bar(labels, numbers)
barlist[0].set_color('r')
barlist[1].set_color('g')
barlist[2].set_color('b')
plt.xticks(rotation=15)
plt.show()
dataset[dataset[s1_country_str]=="Indonesia"][s1_uni_str].value_counts()
dataset[s2_uni_str].value_counts()[:10]
dataset[s1_uni_str].value_counts()[:10]
dataset[dataset[education_country_str]=="LN Non-Amerika"][s1_country_str].value_counts()[:10]
dataset[dataset[education_country_str]=="LN Non-Amerika"][s2_country_str].value_counts()[:10]
dataset[citizen_str].value_counts()[:5]
dataset[working_overseas_str].value_counts()
dataset[pd.notnull(dataset[c_str])][sex_str].value_counts()
labels = ["Laki-laki", "Perempuan"]
numbers = [26, 5]
plt.pie(numbers, labels=labels)
plt.show()
dataset[pd.notnull(dataset[c_str])][age_str].mean()
dataset[pd.notnull(dataset[c_str])][education_country_str].value_counts()
labels = ["Lulusan Dalam Negeri", "Lulusan Luar Negeri Non-Amerika", "Lulusan Amerika"]
numbers = [6, 13, 10]
barlist = plt.bar(labels, numbers)
barlist[0].set_color('r')
barlist[1].set_color('g')
barlist[2].set_color('b')
plt.xticks(rotation=15)
plt.show()
dataset[pd.notnull(dataset[c_str])][dataset[education_country_str]=="Indonesia"]
dataset[pd.notnull(dataset[vp_str])][sex_str].value_counts()
labels = ["Laki-laki", "Perempuan"]
numbers = [116, 47]
plt.pie(numbers, labels=labels)
plt.show()
dataset[pd.notnull(dataset[vp_str])][age_str].mean()
dataset[pd.notnull(dataset[vp_str])][education_country_str].value_counts()
labels = ["Lulusan Dalam Negeri", "Lulusan Luar Negeri Non-Amerika", "Lulusan Amerika"]
numbers = [61, 62, 36]
barlist = plt.bar(labels, numbers)
barlist[0].set_color('r')
barlist[1].set_color('g')
barlist[2].set_color('b')
plt.xticks(rotation=15)
plt.show()