64 lines
2.1 KiB
Python
64 lines
2.1 KiB
Python
|
|
#!/usr/bin/python3
|
||
|
|
|
||
|
|
import gzip
|
||
|
|
import shutil
|
||
|
|
from argparse import ArgumentParser
|
||
|
|
|
||
|
|
import requests, tempfile, os, pathlib
|
||
|
|
|
||
|
|
sourceList = {}
|
||
|
|
|
||
|
|
def getSourceList():
|
||
|
|
global sourceList
|
||
|
|
if sourceList == {}:
|
||
|
|
print("Fetching sources list")
|
||
|
|
json = requests.get(
|
||
|
|
"https://batch.openaddresses.io/api/data"
|
||
|
|
).json()
|
||
|
|
for s in json:
|
||
|
|
if s["layer"] != "addresses":
|
||
|
|
continue
|
||
|
|
if s["source"] in sourceList:
|
||
|
|
if s["updated"] > sourceList[s["source"]]["updated"]:
|
||
|
|
sourceList[s["source"]] = s
|
||
|
|
else:
|
||
|
|
sourceList[s["source"]] = s
|
||
|
|
return sourceList
|
||
|
|
|
||
|
|
|
||
|
|
def downloadSources(id, outfolder):
|
||
|
|
for sourceName in getSourceList():
|
||
|
|
s = getSourceList()[sourceName]
|
||
|
|
if s["source"].startswith(id):
|
||
|
|
outfilename = outfolder + "/" + s["source"] + "-addresses-" + s["name"] + ".geojson"
|
||
|
|
outfoldername = os.path.dirname(outfilename)
|
||
|
|
if os.path.isfile(outfilename):
|
||
|
|
print("Skipping " + s["source"] + ", already on disk.")
|
||
|
|
continue
|
||
|
|
print("Downloading " + s["source"])
|
||
|
|
gzdl = requests.get("https://v2.openaddresses.io/batch-prod/job/" + str(s["job"]) + "/source.geojson.gz", stream=True)
|
||
|
|
tmp = tempfile.NamedTemporaryFile()
|
||
|
|
with open(tmp.name, 'wb') as tf:
|
||
|
|
for chunk in gzdl.iter_content(chunk_size=16*1024):
|
||
|
|
tf.write(chunk)
|
||
|
|
pathlib.Path(outfoldername).mkdir(parents=True, exist_ok=True)
|
||
|
|
with gzip.open(tmp.name) as gzf, open(outfilename, 'wb') as outf:
|
||
|
|
shutil.copyfileobj(gzf, outf)
|
||
|
|
|
||
|
|
parser = ArgumentParser(
|
||
|
|
description="Download address data from OpenAddresses.io"
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"source",
|
||
|
|
help="Source dataset ID, or partial ID. For example: us/al/ will download all Alabama datasets, us/mt/statewide will download the Montana statewide dataset.",
|
||
|
|
)
|
||
|
|
|
||
|
|
parser.add_argument(
|
||
|
|
"outfolder",
|
||
|
|
help="Output folder",
|
||
|
|
)
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
args = parser.parse_args()
|
||
|
|
downloadSources(args.source, args.outfolder)
|