Copyright 2020 Arjuna Sky Kok
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
dataset_url = "https://arjunaskykok.s3-ap-southeast-1.amazonaws.com/pendiri/east_ventures/2020-10-06_east-ventures.csv"
dataset = pd.read_csv(dataset_url, dtype={"Tahun Lahir (Perkiraan)": "Int64"})
dataset.head()
dataset["Seks"].value_counts()
labels = ["Laki-laki", "Perempuan"]
numbers = [153, 18]
plt.pie(numbers, labels=labels)
plt.show()
dataset.groupby("Perintis").count()["Situs"].value_counts()
thn = "Tahun Lahir (Perkiraan)"
thn_1980 = dataset[(dataset[thn]>1980) & (dataset[thn]<=1990)]["Pendiri"].count()
thn_1970 = dataset[(dataset[thn]>1970) & (dataset[thn]<=1980)]["Pendiri"].count()
thn_1960 = dataset[(dataset[thn]>1960) & (dataset[thn]<=1970)]["Pendiri"].count()
thn_1990 = dataset[(dataset[thn]>1990) & (dataset[thn]<=2000)]["Pendiri"].count()
[thn_1960, thn_1970, thn_1980, thn_1990]
Semua angkanya kira-kira.
lulusan = "Negara Pendidikan Dinormalisasi"
dataset[lulusan].unique()
def convert_singapura_to_ln(x):
if x is not np.nan:
return x.replace("Singapura", "Luar Negeri Non-Amerika")
return x
dataset[lulusan] = dataset[lulusan].apply(convert_singapura_to_ln)
lulusan = "Negara Pendidikan Dinormalisasi"
ina = "Indonesia"
ln = "Luar Negeri Non-Amerika"
amrik = "Amerika"
def is_ina(x):
if x is np.nan:
return False
return (x == f"{ina} 100%") or (f"{ina} 60" in x) or (f"{ina} 50" in x) or (f"{ina} 70" in x) or (f"{ina} 90" in x)
def is_ln(x):
if x is np.nan:
return False
return (x == f"{ln} 100%") or (f"{ln} 60" in x) or (f"{ln} 50" in x) or (f"{ln} 70" in x) or (f"{ln} 90" in x)
def is_amrik(x):
if x is np.nan:
return False
return ("Luar" not in x) and ((x == f"{amrik} 100%") or (f"{amrik} 60" in x) or (f"{amrik} 50" in x) or (f"{amrik} 70" in x) or (f"{amrik} 90" in x))
dataset["lulusan_dalam_negeri"] = dataset[lulusan].apply(is_ina)
dataset["lulusan_luar_negeri_non_amerika"] = dataset[lulusan].apply(is_ln)
dataset["lulusan_amerika"] = dataset[lulusan].apply(is_amrik)
dlm_negeri = dataset["lulusan_dalam_negeri"].values.sum()
ln_negeri = dataset["lulusan_luar_negeri_non_amerika"].values.sum()
amrik = dataset["lulusan_amerika"].values.sum()
[dlm_negeri, ln_negeri, amrik]
labels = ["Lulusan Dalam Negeri", "Lulusan Luar Negeri Non-Amerika", "Lulusan Amerika"]
numbers = [53, 49, 56]
barlist = plt.bar(labels, numbers)
barlist[0].set_color('r')
barlist[1].set_color('g')
barlist[2].set_color('b')
plt.xticks(rotation=15)
plt.show()
dataset[dataset["Negara S1"]=="Indonesia"]["Universitas S1"].value_counts()
dataset[dataset["lulusan_dalam_negeri"]]["Universitas S1"].value_counts()
dataset[dataset["Negara S1"]=="Amerika Serikat"]["Universitas S1"].value_counts()
dataset[dataset["lulusan_amerika"]]["Universitas S1"].value_counts()
dataset[dataset["lulusan_amerika"]]["Universitas S2"].value_counts()
dataset[dataset["lulusan_luar_negeri_non_amerika"]]["Universitas S1"].value_counts()
dataset[(dataset["Negara S1"]!="Indonesia") & (dataset["Negara S1"]!="Amerika Serikat")]["Universitas S1"].value_counts()
dataset[(dataset["Negara S1"]!="Indonesia") & (dataset["Negara S1"]!="Amerika Serikat")]["Negara S1"].value_counts()
dataset["Kategori Pendiri"].value_counts()
ceo_bisnis = dataset[(dataset["Kategori Pendiri"]=="Bisnis") & (dataset["Jabatan"]=="CEO")]["Pendiri"].count()
ceo_teknikal = dataset[(dataset["Kategori Pendiri"]=="Teknikal") & (dataset["Jabatan"]=="CEO")]["Pendiri"].count()
[ceo_bisnis, ceo_teknikal]