Facebook implements OAuth 2.0 as its standard authentication mechanism, but provides a convenient way for you to get an access token for development purposes, and we'll opt to take advantage of that convenience in this notebook.
To get started, log in to your Facebook account and go to https://developers.facebook.com/tools/explorer/ to obtain an ACCESS_TOKEN, and then paste it into the code cell below.
# Copy and paste in the value you just got from the inline frame into this variable and execute this cell.
# Keep in mind that you could have just gone to https://developers.facebook.com/tools/access_token/
# and retrieved the "User Token" value from the Access Token Tool
ACCESS_TOKEN = ''
import requests # pip install requests
import json
base_url = 'https://graph.facebook.com/me'
# Specify which fields to retrieve
fields = 'id,name,likes.limit(10){about}'
url = '{0}?fields={1}&access_token={2}'.format(base_url, fields, ACCESS_TOKEN)
# This API is HTTP-based and could be requested in the browser,
# with a command line utlity like curl, or using just about
# any programming language by making a request to the URL.
# Click the hyperlink that appears in your notebook output
# when you execute this code cell to see for yourself...
print(url)
# Interpret the response as JSON and convert back
# to Python data structures
content = requests.get(url).json()
# Pretty-print the JSON and display it
print(json.dumps(content, indent=1))
Facebook SDK for Python API reference: http://facebook-sdk.readthedocs.io/en/v2.0.0/api.html
import facebook # pip install facebook-sdk
import json
# A helper function to pretty-print Python objects as JSON
def pp(o):
print(json.dumps(o, indent=1))
# Create a connection to the Graph API with your access token
g = facebook.GraphAPI(ACCESS_TOKEN, version='2.7')
# Execute a few example queries:
# Get my ID
pp(g.get_object('me'))
# Get the connections to an ID
# Example connection names: 'feed', 'likes', 'groups', 'posts'
pp(g.get_connections(id='me', connection_name='likes'))
# Search for a location, may require approved app
pp(g.request("search", {'type': 'place', 'center': '40.749444, -73.968056', 'fields': 'name, location'}))
# Search for a page's ID by name
pp(g.request("search", {'q': 'Mining the Social Web', 'type': 'page'}))
# Grab the ID for the book and check the number of fans
mtsw_id = '146803958708175'
pp(g.get_object(id=mtsw_id, fields=['fan_count']))
# MTSW catalog link
pp(g.get_object('http://shop.oreilly.com/product/0636920030195.do'))
# PCI catalog link
pp(g.get_object('http://shop.oreilly.com/product/9780596529321.do'))
# The following code may require the developer's app be submitted for review and
# approved. See https://developers.facebook.com/docs/apps/review
# Take, for example, three popular musicians and their page IDs.
taylor_swift_id = '19614945368'
drake_id = '83711079303'
beyonce_id = '28940545600'
# Declare a helper function for retrieving the total number of fans ('likes') a page has
def get_total_fans(page_id):
return int(g.get_object(id=page_id, fields=['fan_count'])['fan_count'])
tswift_fans = get_total_fans(taylor_swift_id)
drake_fans = get_total_fans(drake_id)
beyonce_fans = get_total_fans(beyonce_id)
print('Taylor Swift: {0} fans on Facebook'.format(tswift_fans))
print('Drake: {0} fans on Facebook'.format(drake_fans))
print('Beyoncé: {0} fans on Facebook'.format(beyonce_fans))
# Declare a helper function for retrieving the official feed from a given page.
def retrieve_page_feed(page_id, n_posts):
"""Retrieve the first n_posts from a page's feed in reverse
chronological order."""
feed = g.get_connections(page_id, 'posts')
posts = []
posts.extend(feed['data'])
while len(posts) < n_posts:
try:
feed = requests.get(feed['paging']['next']).json()
posts.extend(feed['data'])
except KeyError:
# When there are no more posts in the feed, break
print('Reached end of feed.')
break
if len(posts) > n_posts:
posts = posts[:n_posts]
print('{} items retrieved from feed'.format(len(posts)))
return posts
# Declare a helper function for returning the message content of a post
def get_post_message(post):
try:
message = post['story']
except KeyError:
# Post may have 'message' instead of 'story'
pass
try:
message = post['message']
except KeyError:
# Post has neither
message = ''
return message.replace('\n', ' ')
# Retrieve the last 5 items from their feeds
for artist in [taylor_swift_id, drake_id, beyonce_id]:
print()
feed = retrieve_page_feed(artist, 5)
for i, post in enumerate(feed):
message = get_post_message(post)[:50]
print('{0} - {1}...'.format(i+1, message))
# Measure the response to a post in terms of likes, shares, and comments
def measure_response(post_id):
"""Returns the number of likes, shares, and comments on a
given post as a measure of user engagement."""
likes = g.get_object(id=post_id,
fields=['likes.limit(0).summary(true)'])\
['likes']['summary']['total_count']
shares = g.get_object(id=post_id,
fields=['shares.limit(0).summary(true)'])\
['shares']['count']
comments = g.get_object(id=post_id,
fields=['comments.limit(0).summary(true)'])\
['comments']['summary']['total_count']
return likes, shares, comments
# Measure the relative share of a page's fans engaging with a post
def measure_engagement(post_id, total_fans):
"""Returns the number of likes, shares, and comments on a
given post as a measure of user engagement."""
likes = g.get_object(id=post_id,
fields=['likes.limit(0).summary(true)'])\
['likes']['summary']['total_count']
shares = g.get_object(id=post_id,
fields=['shares.limit(0).summary(true)'])\
['shares']['count']
comments = g.get_object(id=post_id,
fields=['comments.limit(0).summary(true)'])\
['comments']['summary']['total_count']
likes_pct = likes / total_fans * 100.0
shares_pct = shares / total_fans * 100.0
comments_pct = comments / total_fans * 100.0
return likes_pct, shares_pct, comments_pct
# Retrieve the last 5 items from the artists' feeds, print the
# reaction and the degree of engagement
artist_dict = {'Taylor Swift': taylor_swift_id,
'Drake': drake_id,
'Beyoncé': beyonce_id}
for name, page_id in artist_dict.items():
print()
print(name)
print('------------')
feed = retrieve_page_feed(page_id, 5)
total_fans = get_total_fans(page_id)
for i, post in enumerate(feed):
message = get_post_message(post)[:30]
post_id = post['id']
likes, shares, comments = measure_response(post_id)
likes_pct, shares_pct, comments_pct = measure_engagement(post_id, total_fans)
print('{0} - {1}...'.format(i+1, message))
print(' Likes {0} ({1:7.5f}%)'.format(likes, likes_pct))
print(' Shares {0} ({1:7.5f}%)'.format(shares, shares_pct))
print(' Comments {0} ({1:7.5f}%)'.format(comments, comments_pct))
import pandas as pd # pip install pandas
# Create a Pandas DataFrame to contain artist page
# feed information
columns = ['Name',
'Total Fans',
'Post Number',
'Post Date',
'Headline',
'Likes',
'Shares',
'Comments',
'Rel. Likes',
'Rel. Shares',
'Rel. Comments']
musicians = pd.DataFrame(columns=columns)
# Build the DataFrame by adding the last 10 posts and their audience
# reaction for each of the artists
for page_id in [taylor_swift_id, drake_id, beyonce_id]:
name = g.get_object(id=page_id)['name']
fans = get_total_fans(page_id)
feed = retrieve_page_feed(page_id, 10)
for i, post in enumerate(feed):
likes, shares, comments = measure_response(post['id'])
likes_pct, shares_pct, comments_pct = measure_engagement(post['id'], fans)
musicians = musicians.append({'Name': name,
'Total Fans': fans,
'Post Number': i+1,
'Post Date': post['created_time'],
'Headline': get_post_message(post),
'Likes': likes,
'Shares': shares,
'Comments': comments,
'Rel. Likes': likes_pct,
'Rel. Shares': shares_pct,
'Rel. Comments': comments_pct,
}, ignore_index=True)
# Fix the dtype of a few columns
for col in ['Post Number', 'Total Fans', 'Likes', 'Shares', 'Comments']:
musicians[col] = musicians[col].astype(int)
# Show a preview of the DataFrame
musicians.head()
import matplotlib # pip install matplotlib
%matplotlib inline
musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Likes', kind='bar')
musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Shares', kind='bar')
musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Comments', kind='bar')
musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Rel. Likes', kind='bar')
musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Rel. Shares', kind='bar')
musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Rel. Comments', kind='bar')
# Reset the index to a multi-index
musicians = musicians.set_index(['Name','Post Number'])
# The unstack method pivots the index labels
# and lets you get data columns grouped by artist
musicians.unstack(level=0)['Likes']
# Plot the comparative reactions to each artist's last 10 Facebook posts
plot = musicians.unstack(level=0)['Likes'].plot(kind='bar', subplots=False, figsize=(10,5), width=0.8)
plot.set_xlabel('10 Latest Posts')
plot.set_ylabel('Number of Likes Received')
# Plot the engagement of each artist's Facebook fan base to the last 10 posts
plot = musicians.unstack(level=0)['Rel. Likes'].plot(kind='bar', subplots=False, figsize=(10,5), width=0.8)
plot.set_xlabel('10 Latest Posts')
plot.set_ylabel('Likes / Total Fans (%)')
print('Average Likes / Total Fans')
print(musicians.unstack(level=0)['Rel. Likes'].mean())
print('\nAverage Shares / Total Fans')
print(musicians.unstack(level=0)['Rel. Shares'].mean())
print('\nAverage Comments / Total Fans')
print(musicians.unstack(level=0)['Rel. Comments'].mean())