AddressDatabase/downloadoa.py
2025-11-15 19:51:14 -07:00

64 lines
2.1 KiB
Python
Executable File

#!/usr/bin/python3
import gzip
import shutil
from argparse import ArgumentParser
import requests, tempfile, os, pathlib
sourceList = {}
def getSourceList():
global sourceList
if sourceList == {}:
print("Fetching sources list")
json = requests.get(
"https://batch.openaddresses.io/api/data"
).json()
for s in json:
if s["layer"] != "addresses":
continue
if s["source"] in sourceList:
if s["updated"] > sourceList[s["source"]]["updated"]:
sourceList[s["source"]] = s
else:
sourceList[s["source"]] = s
return sourceList
def downloadSources(id, outfolder):
for sourceName in getSourceList():
s = getSourceList()[sourceName]
if s["source"].startswith(id):
outfilename = outfolder + "/" + s["source"] + "-addresses-" + s["name"] + ".geojson"
outfoldername = os.path.dirname(outfilename)
if os.path.isfile(outfilename):
print("Skipping " + s["source"] + ", already on disk.")
continue
print("Downloading " + s["source"])
gzdl = requests.get("https://v2.openaddresses.io/batch-prod/job/" + str(s["job"]) + "/source.geojson.gz", stream=True)
tmp = tempfile.NamedTemporaryFile()
with open(tmp.name, 'wb') as tf:
for chunk in gzdl.iter_content(chunk_size=16*1024):
tf.write(chunk)
pathlib.Path(outfoldername).mkdir(parents=True, exist_ok=True)
with gzip.open(tmp.name) as gzf, open(outfilename, 'wb') as outf:
shutil.copyfileobj(gzf, outf)
parser = ArgumentParser(
description="Download address data from OpenAddresses.io"
)
parser.add_argument(
"source",
help="Source dataset ID, or partial ID. For example: us/al/ will download all Alabama datasets, us/mt/statewide will download the Montana statewide dataset.",
)
parser.add_argument(
"outfolder",
help="Output folder",
)
if __name__ == "__main__":
args = parser.parse_args()
downloadSources(args.source, args.outfolder)