aboutsummaryrefslogtreecommitdiff
path: root/backend/tolData/enwiki/genImgData.py
diff options
context:
space:
mode:
Diffstat (limited to 'backend/tolData/enwiki/genImgData.py')
-rwxr-xr-xbackend/tolData/enwiki/genImgData.py11
1 files changed, 4 insertions, 7 deletions
diff --git a/backend/tolData/enwiki/genImgData.py b/backend/tolData/enwiki/genImgData.py
index dedfe14..97e696f 100755
--- a/backend/tolData/enwiki/genImgData.py
+++ b/backend/tolData/enwiki/genImgData.py
@@ -4,18 +4,15 @@ import sys, re
import bz2, html, urllib.parse
import sqlite3
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
+import argparse
+parser = argparse.ArgumentParser(description="""
For some set of page IDs, looks up their content in the wiki dump,
and tries to parse infobox image names, storing them into a database.
The program can be re-run with an updated set of page IDs, and
will skip already-processed page IDs.
-"""
-if len(sys.argv) > 1:
- print(usageInfo, file=sys.stderr)
- sys.exit(1)
+""", formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.parse_args()
def getInputPageIds():
pageIds = set()