6 files changed, 31 insertions, 48 deletions
diff --git a/backend/tolData/enwiki/downloadImgLicenseInfo.py b/backend/tolData/enwiki/downloadImgLicenseInfo.py
index 399922e..dd39d54 100755
--- a/backend/tolData/enwiki/downloadImgLicenseInfo.py
+++ b/backend/tolData/enwiki/downloadImgLicenseInfo.py
@@ -5,19 +5,16 @@ import sqlite3, urllib.parse, html
 import requests
 import time, signal
 
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
+import argparse
+parser = argparse.ArgumentParser(description="""
 Reads image names from a database, and uses enwiki's online API to obtain
 licensing information for them, adding the info to the database.
 
 SIGINT causes the program to finish an ongoing download and exit.
 The program can be re-run to continue downloading, and looks
 at already-processed names to decide what to skip.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
+""", formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.parse_args()
 
 imgDb = "imgData.db"
 apiUrl = "https://en.wikipedia.org/w/api.php"
diff --git a/backend/tolData/enwiki/downloadImgs.py b/backend/tolData/enwiki/downloadImgs.py
index 8fb605f..520677f 100755
--- a/backend/tolData/enwiki/downloadImgs.py
+++ b/backend/tolData/enwiki/downloadImgs.py
@@ -5,19 +5,16 @@ import sqlite3
 import urllib.parse, requests
 import time, signal
 
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
+import argparse
+parser = argparse.ArgumentParser(description="""
 Downloads images from URLs in an image database, into an output directory,
 with names of the form 'pageId1.ext1'.
 
 SIGINT causes the program to finish an ongoing download and exit.
 The program can be re-run to continue downloading, and looks
 in the output directory do decide what to skip.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
+""", formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.parse_args()
 
 imgDb = "imgData.db" # About 130k image names
 outDir = "imgs"
diff --git a/backend/tolData/enwiki/genDescData.py b/backend/tolData/enwiki/genDescData.py
index b0ca272..0085d70 100755
--- a/backend/tolData/enwiki/genDescData.py
+++ b/backend/tolData/enwiki/genDescData.py
@@ -5,15 +5,12 @@ import bz2
 import html, mwxml, mwparserfromhell
 import sqlite3
 
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Reads through the wiki dump, and attempts to
-parse short-descriptions, and add them to a database.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
+import argparse
+parser = argparse.ArgumentParser(description="""
+Reads through the wiki dump, and attempts to parse short-descriptions,
+and add them to a database
+""", formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.parse_args()
 
 dumpFile = "enwiki-20220501-pages-articles-multistream.xml.bz2" # Had about 22e6 pages
 enwikiDb = "descData.db"
diff --git a/backend/tolData/enwiki/genDumpIndexDb.py b/backend/tolData/enwiki/genDumpIndexDb.py
index 3955885..1bffb27 100755
--- a/backend/tolData/enwiki/genDumpIndexDb.py
+++ b/backend/tolData/enwiki/genDumpIndexDb.py
@@ -4,14 +4,11 @@ import sys, os, re
 import bz2
 import sqlite3
 
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Adds data from the wiki dump index-file into a database.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
+import argparse
+parser = argparse.ArgumentParser(description="""
+Adds data from the wiki dump index-file into a database
+""", formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.parse_args()
 
 indexFile = "enwiki-20220501-pages-articles-multistream-index.txt.bz2" # Had about 22e6 lines
 indexDb = "dumpIndex.db"
diff --git a/backend/tolData/enwiki/genImgData.py b/backend/tolData/enwiki/genImgData.py
index dedfe14..97e696f 100755
--- a/backend/tolData/enwiki/genImgData.py
+++ b/backend/tolData/enwiki/genImgData.py
@@ -4,18 +4,15 @@ import sys, re
 import bz2, html, urllib.parse
 import sqlite3
 
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
+import argparse
+parser = argparse.ArgumentParser(description="""
 For some set of page IDs, looks up their content in the wiki dump,
 and tries to parse infobox image names, storing them into a database.
 
 The program can be re-run with an updated set of page IDs, and
 will skip already-processed page IDs.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
+""", formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.parse_args()
 
 def getInputPageIds():
 	pageIds = set()
diff --git a/backend/tolData/enwiki/lookupPage.py b/backend/tolData/enwiki/lookupPage.py
index 1a90851..e7b95f0 100755
--- a/backend/tolData/enwiki/lookupPage.py
+++ b/backend/tolData/enwiki/lookupPage.py
@@ -4,19 +4,17 @@ import sys, re
 import bz2
 import sqlite3
 
-usageInfo = f"""
-Usage: {sys.argv[0]} title1
-
-Looks up a page with title title1 in the wiki dump, using
-the dump-index db, and prints the corresponding <page>.
-"""
-if len(sys.argv) != 2:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
+import argparse
+parser = argparse.ArgumentParser(description="""
+Looks up a page with title title1 in the wiki dump, using the dump-index
+db, and prints the corresponding <page>.
+""", formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.add_argument("title", help="The title to look up")
+args = parser.parse_args()
 
 dumpFile = "enwiki-20220501-pages-articles-multistream.xml.bz2"
 indexDb = "dumpIndex.db"
-pageTitle = sys.argv[1].replace("_", " ")
+pageTitle = args.title.replace("_", " ")
 
 print("Looking up offset in index db")
 dbCon = sqlite3.connect(indexDb)