# Add the facets overview python code to the python path
import sys
sys.path.append('./python')
# Load UCI census train and test data into dataframes.
import pandas as pd
features = ["Age", "Workclass", "fnlwgt", "Education", "Education-Num", "Martial Status",
"Occupation", "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss",
"Hours per week", "Country", "Target"]
train_data = pd.read_csv(
"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",
names=features,
sep=r'\s*,\s*',
engine='python',
na_values="?")
test_data = pd.read_csv(
"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test",
names=features,
sep=r'\s*,\s*',
skiprows=[0],
engine='python',
na_values="?")
# Calculate the feature statistics proto from the datasets and stringify it for use in facets overview
from generic_feature_statistics_generator import GenericFeatureStatisticsGenerator
import base64
gfsg = GenericFeatureStatisticsGenerator()
proto = gfsg.ProtoFromDataFrames([{'name': 'train', 'table': train_data},
{'name': 'test', 'table': test_data}])
protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")
# Display the facets overview visualization for this data
from IPython.core.display import display, HTML
HTML_TEMPLATE = """<link rel="import" href="/nbextensions/facets-dist/facets-jupyter.html" >
<facets-overview id="elem"></facets-overview>
<script>
document.querySelector("#elem").protoInput = "{protostr}";
</script>"""
html = HTML_TEMPLATE.format(protostr=protostr)
display(HTML(html))