ActivityPub Extractor

  • Extract post data from an ActifvityPub archive and create a standalone webpage.
  • NOTE: This requires Python3.
  • If you use this, you take all responsibility.

#!/usr/bin/python3

#
#  A quick and dirty Python utility to extract post data
#.    from an ActivityPub archive.
#  Creates a standalone HTML web page with the extracted data.
#
#  NOTE: DOES NOT DO ANYTHING ABOUT IMAGES
#  
#  This is provided as is, with no warranty, safety, or 
#.     even usability. 
#  You are responsible if you choose to use this.
#
#  There is no copyright.
#

import json
import pprint


# FIELDS IN ARCHIVE TO EXTRACT
INTERESTING_FIELDS = ['published','content',]

#==== THINGS YOU SHOULD CHANGE ====
# PATH TO ACTIVITYPUB OUTBOX
PATH_TO_OUTBOX = 'outbox.json'
# AN HTML PAGE WILL BE SAVED HERE
PATH_TO_HTML_PAGE = ''my_stuff.html'


#==== CODE ====

def read_outbox(filename: str):
    with open(filename) as f_in:
        return json.load(f_in)

def get_posted_items(outbox_dict):
    return outbox_dict['orderedItems']

def handle_interesting_fields(post):
    result = ''
    for field in INTERESTING_FIELDS:
        if field in post:
                content = post[field]
                result = result +  content + '<br>\n'
    result = result + '<hr>'
    return result 

def extract_data(posts):
    result = ''
    for post in posts:
        if 'object' in post:
            post_object = post['object']
            result = result + \
            handle_interesting_fields(post=post_object)
    return result 
            
    
def create_html_page(html_body):
    html = '''<!DOCTYPE html>
            <html>
            <head>
            <title>Page Title</title>
            </head>
            <body>
            '''
    html = html + html_body
    html = html + '''</body>
                     </html>
                    '''
    return html

def save_html_page(html_page):
    with open(PATH_TO_HTML_PAGE, 'w') as f:
        f.write(html_page)
        f.close()
    
if __name__ == "__main__":
    my_data = read_outbox(filename=PATH_TO_OUTBOX)
    posts = get_posted_items(outbox_dict=my_data)
    html_body = extract_data(posts=posts)
    html = create_html_page(html_body=html_body)
    save_html_page(html_page=html)
    print('*** DONE ***')

This site does not track your information.