Use argparse in python scripts

author: Terry Truong <terry06890@gmail.com> 2022-08-20 13:16:21 +1000
committer: Terry Truong <terry06890@gmail.com> 2022-08-20 13:16:21 +1000
commit: 930c12d33e1093f874a4beb4d6376621e464e8c0 (patch)
tree: 381722fc3ab9ebda482cb18d29e1091458aa93da /backend/tolData/enwiki
parent: 8144003565797f0d18645a416b95d4365bba5fdd (diff)
6 files changed, 31 insertions, 48 deletions
diff --git a/backend/tolData/enwiki/downloadImgLicenseInfo.py b/backend/tolData/enwiki/downloadImgLicenseInfo.py
index 399922e..dd39d54 100755
--- a/backend/tolData/enwiki/downloadImgLicenseInfo.py
+++ b/backend/tolData/enwiki/downloadImgLicenseInfo.py
@@ -5,19 +5,16 @@ import sqlite3, urllib.parse, html
 import requests
 import time, signal
 
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
+import argparse
+parser = argparse.ArgumentParser(description="""
 Reads image names from a database, and uses enwiki's online API to obtain
 licensing information for them, adding the info to the database.
 
 SIGINT causes the program to finish an ongoing download and exit.
 The program can be re-run to continue downloading, and looks
 at already-processed names to decide what to skip.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
+""", formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.parse_args()
 
 imgDb = "imgData.db"
 apiUrl = "https://en.wikipedia.org/w/api.php"
diff --git a/backend/tolData/enwiki/downloadImgs.py b/backend/tolData/enwiki/downloadImgs.py
index 8fb605f..520677f 100755
--- a/backend/tolData/enwiki/downloadImgs.py
+++ b/backend/tolData/enwiki/downloadImgs.py
@@ -5,19 +5,16 @@ import sqlite3
 import urllib.parse, requests
 import time, signal
 
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
+import argparse
+parser = argparse.ArgumentParser(description="""
 Downloads images from URLs in an image database, into an output directory,
 with names of the form 'pageId1.ext1'.
 
 SIGINT causes the program to finish an ongoing download and exit.
 The program can be re-run to continue downloading, and looks
 in the output directory do decide what to skip.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
+""", formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.parse_args()
 
 imgDb = "imgData.db" # About 130k image names
 outDir = "imgs"
diff --git a/backend/tolData/enwiki/genDescData.py b/backend/tolData/enwiki/genDescData.py
index b0ca272..0085d70 100755
--- a/backend/tolData/enwiki/genDescData.py
+++ b/backend/tolData/enwiki/genDescData.py
@@ -5,15 +5,12 @@ import bz2
 import html, mwxml, mwparserfromhell
 import sqlite3
 
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Reads through the wiki dump, and attempts to
-parse short-descriptions, and add them to a database.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
+import argparse
+parser = argparse.ArgumentParser(description="""
+Reads through the wiki dump, and attempts to parse short-descriptions,
+and add them to a database
+""", formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.parse_args()
 
 dumpFile = "enwiki-20220501-pages-articles-multistream.xml.bz2" # Had about 22e6 pages
 enwikiDb = "descData.db"
diff --git a/backend/tolData/enwiki/genDumpIndexDb.py b/backend/tolData/enwiki/genDumpIndexDb.py
index 3955885..1bffb27 100755
--- a/backend/tolData/enwiki/genDumpIndexDb.py
+++ b/backend/tolData/enwiki/genDumpIndexDb.py
@@ -4,14 +4,11 @@ import sys, os, re
 import bz2
 import sqlite3
 
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Adds data from the wiki dump index-file into a database.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
+import argparse
+parser = argparse.ArgumentParser(description="""
+Adds data from the wiki dump index-file into a database
+""", formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.parse_args()
 
 indexFile = "enwiki-20220501-pages-articles-multistream-index.txt.bz2" # Had about 22e6 lines
 indexDb = "dumpIndex.db"
diff --git a/backend/tolData/enwiki/genImgData.py b/backend/tolData/enwiki/genImgData.py
index dedfe14..97e696f 100755
--- a/backend/tolData/enwiki/genImgData.py
+++ b/backend/tolData/enwiki/genImgData.py
@@ -4,18 +4,15 @@ import sys, re
 import bz2, html, urllib.parse
 import sqlite3
 
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
+import argparse
+parser = argparse.ArgumentParser(description="""
 For some set of page IDs, looks up their content in the wiki dump,
 and tries to parse infobox image names, storing them into a database.
 
 The program can be re-run with an updated set of page IDs, and
 will skip already-processed page IDs.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
+""", formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.parse_args()
 
 def getInputPageIds():
 	pageIds = set()
diff --git a/backend/tolData/enwiki/lookupPage.py b/backend/tolData/enwiki/lookupPage.py
index 1a90851..e7b95f0 100755
--- a/backend/tolData/enwiki/lookupPage.py
+++ b/backend/tolData/enwiki/lookupPage.py
@@ -4,19 +4,17 @@ import sys, re
 import bz2
 import sqlite3
 
-usageInfo = f"""
-Usage: {sys.argv[0]} title1
-
-Looks up a page with title title1 in the wiki dump, using
-the dump-index db, and prints the corresponding <page>.
-"""
-if len(sys.argv) != 2:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
+import argparse
+parser = argparse.ArgumentParser(description="""
+Looks up a page with title title1 in the wiki dump, using the dump-index
+db, and prints the corresponding <page>.
+""", formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.add_argument("title", help="The title to look up")
+args = parser.parse_args()
 
 dumpFile = "enwiki-20220501-pages-articles-multistream.xml.bz2"
 indexDb = "dumpIndex.db"
-pageTitle = sys.argv[1].replace("_", " ")
+pageTitle = args.title.replace("_", " ")
 
 print("Looking up offset in index db")
 dbCon = sqlite3.connect(indexDb)
author	Terry Truong <terry06890@gmail.com>	2022-08-20 13:16:21 +1000
committer	Terry Truong <terry06890@gmail.com>	2022-08-20 13:16:21 +1000
commit	930c12d33e1093f874a4beb4d6376621e464e8c0 (patch)
tree	381722fc3ab9ebda482cb18d29e1091458aa93da /backend/tolData/enwiki
parent	8144003565797f0d18645a416b95d4365bba5fdd (diff)