diff options
| author | Terry Truong <terry06890@gmail.com> | 2022-07-11 13:19:18 +1000 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2022-07-11 13:19:18 +1000 |
| commit | 7a28e15874796b3becf97c0193575d906d0cfd01 (patch) | |
| tree | 20c679fb7167c18009a697f0d3db7bed1d1b409c /backend/tolData/eol/genImagesListDb.py | |
| parent | 5fe71ea7b9d9a5d2dc6e8e5ce5b9193629eed74d (diff) | |
Update backend documentation
Diffstat (limited to 'backend/tolData/eol/genImagesListDb.py')
| -rwxr-xr-x | backend/tolData/eol/genImagesListDb.py | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/backend/tolData/eol/genImagesListDb.py b/backend/tolData/eol/genImagesListDb.py new file mode 100755 index 0000000..32df10a --- /dev/null +++ b/backend/tolData/eol/genImagesListDb.py @@ -0,0 +1,36 @@ +#!/usr/bin/python3 + +import sys, os, re +import csv +import sqlite3 + +usageInfo = f""" +Usage: {sys.argv[0]} + +Generates a sqlite db from a directory of CSV files holding EOL image data +""" +if len(sys.argv) > 1: + print(usageInfo, file=sys.stderr) + sys.exit(1) + +imagesListDir = "imagesList/" +dbFile = "imagesList.db" + +print("Creating database") +dbCon = sqlite3.connect(dbFile) +dbCur = dbCon.cursor() +dbCur.execute("CREATE TABLE images" \ + " (content_id INT PRIMARY KEY, page_id INT, source_url TEXT, copy_url TEXT, license TEXT, copyright_owner TEXT)") +print("Reading CSV files") +csvFilenames = os.listdir(imagesListDir) +for filename in csvFilenames: + print(f"Processing {imagesListDir}{filename}") + with open(imagesListDir + filename, newline="") as file: + for (contentId, pageId, sourceUrl, copyUrl, license, owner) in csv.reader(file): + if re.match(r"^[a-zA-Z]", contentId): # Skip header line + continue + dbCur.execute("INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)", + (int(contentId), int(pageId), sourceUrl, copyUrl, license, owner)) +print("Closing database") +dbCon.commit() +dbCon.close() |
