Make it exit more cleanly on Ctrl-C

This commit is contained in:
Skylar Ittner 2025-11-28 23:39:56 -07:00
parent 8cd7e5c6f6
commit b37313adb4

11
main.py
View File

@ -130,6 +130,8 @@ def processOwnChunk(chunk, chunkcount, outfilename, ignorestates, keeponlystates
data.append([addr['number'], addr['street'], addr['unit'], addr['city'], addr['state'], addr['zip'], addr['plus4'], addr['latitude'], addr['longitude'], row.source]) data.append([addr['number'], addr['street'], addr['unit'], addr['city'], addr['state'], addr['zip'], addr['plus4'], addr['latitude'], addr['longitude'], row.source])
except ValidationException as e: except ValidationException as e:
badcount = badcount + 1 badcount = badcount + 1
except KeyboardInterrupt:
os._exit(0)
except Exception as e: except Exception as e:
print("W: Couldn't ingest address:") print("W: Couldn't ingest address:")
print(row) print(row)
@ -152,6 +154,7 @@ def importOwnFile(filename, outfilename, ignorestates, keeponlystates):
chunksize = 1000 chunksize = 1000
in_flight = set() in_flight = set()
with concurrent.futures.ProcessPoolExecutor(max_workers=maxthreads, max_tasks_per_child=100, initializer=init_worker, initargs=(cfg,)) as executor: with concurrent.futures.ProcessPoolExecutor(max_workers=maxthreads, max_tasks_per_child=100, initializer=init_worker, initargs=(cfg,)) as executor:
try:
for chunk in pd.read_csv(file, chunksize=chunksize, usecols=columns, keep_default_na=False, dtype={ for chunk in pd.read_csv(file, chunksize=chunksize, usecols=columns, keep_default_na=False, dtype={
"number":"string","street":"string", "number":"string","street":"string",
"street2":"string","city":"string", "street2":"string","city":"string",
@ -170,6 +173,10 @@ def importOwnFile(filename, outfilename, ignorestates, keeponlystates):
for fut in concurrent.futures.as_completed(in_flight): for fut in concurrent.futures.as_completed(in_flight):
fut.result() fut.result()
except KeyboardInterrupt:
print("\nCtrl-C, exiting!")
executor.shutdown(cancel_futures=True)
sys.exit(0)
print("\nDone processing! Parsed " + str(chunkcount) + " chunks.") print("\nDone processing! Parsed " + str(chunkcount) + " chunks.")
print("There were " + str(badcount) + " unprocessable addresses.") print("There were " + str(badcount) + " unprocessable addresses.")
@ -647,6 +654,7 @@ def tosqlite(addressfile, dbfile):
return rowschanged return rowschanged
if __name__ == "__main__": if __name__ == "__main__":
try:
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Tools to build a standardized U.S. address database from free source data." description="Tools to build a standardized U.S. address database from free source data."
) )
@ -798,3 +806,6 @@ if __name__ == "__main__":
source = args.source source = args.source
for file in args.file: for file in args.file:
importOpenAddressFile(file, outputfile, statesToIgnore, source, args.state, zipprefix) importOpenAddressFile(file, outputfile, statesToIgnore, source, args.state, zipprefix)
except KeyboardInterrupt:
os._exit(0)