ActivityPub Extractor
- Extract post data from an ActifvityPub archive and create a standalone webpage.
- NOTE: This requires Python3.
- If you use this, you take all responsibility.
#!/usr/bin/python3
#
# A quick and dirty Python utility to extract post data
#. from an ActivityPub archive.
# Creates a standalone HTML web page with the extracted data.
#
# NOTE: DOES NOT DO ANYTHING ABOUT IMAGES
#
# This is provided as is, with no warranty, safety, or
#. even usability.
# You are responsible if you choose to use this.
#
# There is no copyright.
#
import json
import pprint
# FIELDS IN ARCHIVE TO EXTRACT
INTERESTING_FIELDS = ['published','content',]
#==== THINGS YOU SHOULD CHANGE ====
# PATH TO ACTIVITYPUB OUTBOX
PATH_TO_OUTBOX = 'outbox.json'
# AN HTML PAGE WILL BE SAVED HERE
PATH_TO_HTML_PAGE = ''my_stuff.html'
#==== CODE ====
def read_outbox(filename: str):
with open(filename) as f_in:
return json.load(f_in)
def get_posted_items(outbox_dict):
return outbox_dict['orderedItems']
def handle_interesting_fields(post):
result = ''
for field in INTERESTING_FIELDS:
if field in post:
content = post[field]
result = result + content + '<br>\n'
result = result + '<hr>'
return result
def extract_data(posts):
result = ''
for post in posts:
if 'object' in post:
post_object = post['object']
result = result + \
handle_interesting_fields(post=post_object)
return result
def create_html_page(html_body):
html = '''<!DOCTYPE html>
<html>
<head>
<title>Page Title</title>
</head>
<body>
'''
html = html + html_body
html = html + '''</body>
</html>
'''
return html
def save_html_page(html_page):
with open(PATH_TO_HTML_PAGE, 'w') as f:
f.write(html_page)
f.close()
if __name__ == "__main__":
my_data = read_outbox(filename=PATH_TO_OUTBOX)
posts = get_posted_items(outbox_dict=my_data)
html_body = extract_data(posts=posts)
html = create_html_page(html_body=html_body)
save_html_page(html_page=html)
print('*** DONE ***')