Project #8 - Data Portrait

Objective

Create a data portrait out of a personal habit of yours.

Submission

My data portrait is a simple representation of the types of content I saved onto the platform, Are.na, over the past 12 days.

Reflection

At the beginning of the exercise, I knew that I didn't want to manually create a spreadsheet or manually translate the data from .csv to json. I wrote a python script to help me translate my raw data (my natural interactions with the platform, Are.na) into spreadsheet and csv format. I did this by accessing the Are.na API and scraping the content from Are.na, then pushing that content into a spreadsheet. Code for that is below:

#program to create csv from arena api scrape for my recent channel adds

from arena import Arena
import time

#api data
my_access_token = "REDACTED"
arena = Arena(my_access_token)
user = arena.users.user('z-ai')


#list all blocks to scrape
blocks = ["9679315", "9679246", "9651385", "9644289", "9633280", "9626461", "9625940", "9625054", "9621997", "9621939", "9621905", "9612164", "9609724", "9607341", "9602743", "9592700", "9592679", "9591470", "9587335", "9578941", "9566487", "9566485", "9566481", "9566455", "9552489", "9552471", "9552262", "9552260", "9550685", "9543588", "9546276"]

#create arrays to store data
days = []
block_ids = []
titles = []
images = []
block_types = []
sources = []
channels = []
channel_types = []


#iterate thru blocks
for block in blocks:


#assign data variables
new_block = arena.blocks.block(block)
block_id = new_block.id
title = str(new_block.title)
content = str(new_block.content)
day = str(new_block.updated_at[8:10])
block_type = str(new_block.base_class)
channel = str(new_block.connections[0]["title"])
channel_type_split = str(new_block.connections[0]["title"].strip("[").split("]"))
result = channel_type_split[1:len(channel_type_split)-1].split(", ")
channel_type = result[0]


if new_block.title == "":
if new_block.content == "":
title = "Untitled"
else:
title = content
else:
title = new_block.title



if new_block.source:
source = str(new_block.source["url"])
else:
source = "No URL"
if new_block.image:
image = str(new_block.image["display"]["url"])
else:
image = "No Image"


#append data into arrays
days.append(day)
block_ids.append(block_id)
titles.append(title)
images.append(image)
block_types.append(block_type)
sources.append(source)
channels.append(channel)
channel_types.append(channel_type)


#prevent blockage by setting time breaks
time.sleep(0.25)


import csv
import json


#create new csv and write
with open('arena.csv', 'w') as arena_file:


#create headers in csv
fieldnames = ['Day', 'ID', 'Title', 'Image', 'Block Type', 'Source', 'Channel', 'Channel Type']
arena_writer = csv.DictWriter(arena_file, fieldnames=fieldnames)
arena_writer.writeheader()


#store data under each header column in csv
for (the_day, the_block_id, the_title, the_image, the_block_type, the_source, the_channel, the_channel_type) in zip(days, block_ids, titles, images, block_types, sources, channels, channel_types):
arena_writer.writerow({fieldnames[0]: the_day, fieldnames[1]: the_block_id, fieldnames[2]: the_title, fieldnames[3]: the_image, fieldnames[4]: the_block_type, fieldnames[5]: the_source, fieldnames[6]: the_channel, fieldnames[7]: the_channel_type})

#csv to json
import csv
import json


# new function, args are exiting csv file and future json file
def make_json(csvFilePath, jsonFilePath):


# new dictionary to store data
data = {}


# read csv with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)


# each row turned into dictionary and is added to data dictionary
for rows in csvReader:


# 1st column named 'No' (number) will be the the primary key
key = rows['No']
data[key] = rows


# open json writer, and use the json.dumps() function to dump data
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))


# declare fill paths
csvFilePath = r'arena-updated-2.csv'
jsonFilePath = r'arena-4.json'


# call json function
make_json(csvFilePath, jsonFilePath)

This code produced the following spreadsheet:

This code also produced the following JSON file: