65 lines
2.0 KiB
Python
65 lines
2.0 KiB
Python
#!/usr/bin/python3
|
|
|
|
# Clean up address data in JSON files by removing leading spaces from each line of the address.
|
|
|
|
import os
|
|
import json
|
|
|
|
# not currently used
|
|
def convert_year(date):
|
|
""" convert date string d/m/y to a standardized format. """
|
|
d, m, y = date.split('/')
|
|
return "/".join(['%d%s' % (19 if int(y) >= 50 else 20, y), m, d])
|
|
|
|
|
|
def write_json_file(workspace, file, data):
|
|
""" write data to a JSON with sorted keys. """
|
|
file = workspace + file
|
|
print(file)
|
|
print(data)
|
|
with open(file, "w") as jsonFile:
|
|
json_contents = json.dumps(data, sort_keys=True, indent=4)
|
|
print(json_contents)
|
|
jsonFile.write(json_contents)
|
|
|
|
|
|
def documents(workspace, fileList):
|
|
""" load each file's JSON content and yielding the file name along with its contents."""
|
|
print("Entering documents function")
|
|
print("length of fileList: " + str(len(fileList)))
|
|
# for file in fileList:
|
|
for index, file in enumerate(fileList):
|
|
print(index, '--->', file)
|
|
# For each file in the list, read its json and provide a generator
|
|
contents = json.load(open(workspace + file))
|
|
# print(contents)
|
|
yield file, contents
|
|
|
|
|
|
# workspace_home = '/var/tmp/lawsoc_new/'
|
|
workspace_home = os.getcwd()
|
|
workspace = workspace_home + "/json_out/"
|
|
office_workspace = workspace + "office/"
|
|
|
|
|
|
if __name__ == '__main__':
|
|
""" """
|
|
office_fileList = os.listdir(office_workspace)
|
|
|
|
for file, data in documents(office_workspace, office_fileList):
|
|
tmp = data["location"]
|
|
# print("Office fileList: ", office_fileList)
|
|
# print("Office workspace: ", office_workspace)
|
|
print("File: ", file)
|
|
# print(type(tmp))
|
|
# print(len(tmp))
|
|
address = data["address"]
|
|
print(type(address))
|
|
# for line in address:
|
|
# print(line)
|
|
print(list(map(str.strip, address)))
|
|
new_address = list(map(str.strip, address))
|
|
data["address"] = new_address
|
|
|
|
write_json_file(office_workspace, file, data)
|