lawsoc-scraper/convert_json_date.py

66 lines
2.1 KiB
Python

#!/usr/bin/python3
# update the format of the admitted_date field in JSON files related to individuals.
import os
import json
def convert_year(date):
""" convert date string d/m/y to a standardized format. """
d, m, y = date.split('/')
return "/".join(['%d%s' % (19 if int(y) >= 50 else 20, y), m, d])
def write_json_file(workspace, file, data):
""" write data to a JSON with sorted keys. """
file = workspace + file
print(file)
print(data)
with open(file, "w") as jsonFile:
json_contents = json.dumps(data, sort_keys=True, indent=4)
print(json_contents)
jsonFile.write(json_contents)
def documents(workspace, fileList):
""" load each file's JSON content and yielding the file name along with its contents."""
print("Entering documents function")
print("length of fileList: " + str(len(fileList)))
# for file in fileList:
for index, file in enumerate(fileList):
print(index, '--->', file)
# For each file in the list, read its json and provide a generator
contents = json.load(open(workspace + file))
# print(contents)
yield file, contents
# workspace_home = '/var/tmp/lawsoc_new/'
workspace_home = os.getcwd()
workspace = workspace_home + "/json_out/"
person_workspace = workspace + "person/"
if __name__ == '__main__':
# Get a list of files in the workspace...
person_fileList = os.listdir(person_workspace)
for file, data in documents(person_workspace, person_fileList):
tmp = data["admitted_date"]
# print("Person fileList: ", person_fileList)
# print("Person workspace: ", person_workspace)
# print("File: ", file)
# print(type(tmp))
# print(len(tmp))
if len(tmp):
data["admitted_date"] = convert_year(tmp)
# print(tmp)
# print(data["admitted_date"])
write_json_file(person_workspace, file, data)
else:
data["admitted_date"] = None
# print(tmp)
# print(data["admitted_date"])
write_json_file(person_workspace, file, data)