# Load UCI census and convert to json for sending to the visualization
import pandas as pd
features = ["Age", "Workclass", "fnlwgt", "Education", "Education-Num", "Marital Status",
"Occupation", "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss",
"Hours per week", "Country", "Target"]
# Load dataframe from external CSV and add header information
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test",
names=features, # name features for header row
sep=r'\s*,\s*', # separator used in this dataset
engine='python',
skiprows=[0], # skip first row without data
na_values="?") # add ? where data is missing
# set the sprite_size based on the number of records in dataset,
# larger datasets can crash the browser if the size is too large (>50000)
sprite_size = 32 if len(df.index)>50000 else 64
jsonstr = df.to_json(orient='records')