100 lines
3.6 KiB
Python
Executable File
100 lines
3.6 KiB
Python
Executable File
#!/usr/bin/python3
|
|
|
|
import os, json, traceback
|
|
from argparse import ArgumentParser
|
|
|
|
rowstocheck = 15000 # Stop reading a file after this many rows, speeds up analysis of many/large address files
|
|
|
|
oklist = []
|
|
emptygeometrylist = []
|
|
emptyaddresslist = []
|
|
nocitylist = []
|
|
noziplist = []
|
|
totallyemptylist = []
|
|
|
|
def checkGeojson(filename):
|
|
filedata = open(filename, 'r')
|
|
linecount = 0
|
|
okcount = 0
|
|
emptygeometrycount = 0
|
|
emptyaddresscount = 0
|
|
emptycitycount = 0
|
|
emptyzipcount = 0
|
|
for line in filedata:
|
|
linecount = linecount + 1
|
|
if linecount > rowstocheck:
|
|
break;
|
|
try:
|
|
data = json.loads(line)
|
|
bad = False
|
|
if not data["properties"]["number"] or not data["properties"]["street"]:
|
|
emptyaddresscount = emptyaddresscount + 1
|
|
bad = True
|
|
if not data["geometry"] or not data["geometry"]["coordinates"][0] or not data["geometry"]["coordinates"][1]:
|
|
emptygeometrycount = emptygeometrycount + 1
|
|
bad = True
|
|
if not data["properties"]["city"] and not data["properties"]["postcode"]: # Flag missing city unless postal code exists, because city can probably be filled from that
|
|
emptycitycount = emptycitycount + 1
|
|
bad = True
|
|
if not data["properties"]["postcode"]:
|
|
emptyzipcount = emptyzipcount + 1
|
|
bad = True
|
|
if bad == False:
|
|
okcount = okcount + 1
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
print("Error encountered while processing", filename, "at", line)
|
|
print(filename, ": OK:", str(okcount), "Bad: geometry:", str(emptygeometrycount), "address:", str(emptyaddresscount), "city:", str(emptycitycount), "zip:", str(emptyzipcount), " ", end="\r", flush=True)
|
|
filedata.close()
|
|
bad = False
|
|
if emptygeometrycount / linecount > .25:
|
|
emptygeometrylist.append(filename)
|
|
bad = True
|
|
if emptyaddresscount / linecount > .67:
|
|
emptyaddresslist.append(filename)
|
|
bad = True
|
|
if emptycitycount / linecount > .67:
|
|
nocitylist.append(filename)
|
|
bad = True
|
|
if emptyzipcount / linecount > .75:
|
|
noziplist.append(filename)
|
|
bad = True
|
|
if emptyaddresscount >= (linecount - 10): # Allow a couple not-fully-empty addresses, otherwise some broken ones won't be reported
|
|
totallyemptylist.append(filename)
|
|
bad = True
|
|
if bad == False:
|
|
oklist.append(filename)
|
|
|
|
parser = ArgumentParser(
|
|
description="Check OpenAddresses GeoJSON files and report on any problems found."
|
|
)
|
|
parser.add_argument(
|
|
"source",
|
|
help="File(s) to check.",
|
|
nargs='+'
|
|
)
|
|
|
|
if __name__ == "__main__":
|
|
args = parser.parse_args()
|
|
print("Checking " + str(len(args.source)) + " OpenAddresses data files.")
|
|
for filename in args.source:
|
|
checkGeojson(filename)
|
|
print(" ")
|
|
print()
|
|
print("== Report ==")
|
|
print(" Files missing geometry:")
|
|
for filename in emptygeometrylist:
|
|
print(" ", filename)
|
|
print(" Files missing street address:")
|
|
for filename in emptyaddresslist:
|
|
print(" ", filename)
|
|
print(" Files missing city:")
|
|
for filename in nocitylist:
|
|
print(" ", filename)
|
|
print(" Files missing postal code:")
|
|
for filename in noziplist:
|
|
print(" ", filename)
|
|
print(" Files missing all street addresses:")
|
|
for filename in totallyemptylist:
|
|
print(" ", filename)
|