aboutsummaryrefslogtreecommitdiff
path: root/backend/tolData/eol/genImagesListDb.py
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-07-11 13:19:18 +1000
committerTerry Truong <terry06890@gmail.com>2022-07-11 13:19:18 +1000
commit7a28e15874796b3becf97c0193575d906d0cfd01 (patch)
tree20c679fb7167c18009a697f0d3db7bed1d1b409c /backend/tolData/eol/genImagesListDb.py
parent5fe71ea7b9d9a5d2dc6e8e5ce5b9193629eed74d (diff)
Update backend documentation
Diffstat (limited to 'backend/tolData/eol/genImagesListDb.py')
-rwxr-xr-xbackend/tolData/eol/genImagesListDb.py36
1 files changed, 36 insertions, 0 deletions
diff --git a/backend/tolData/eol/genImagesListDb.py b/backend/tolData/eol/genImagesListDb.py
new file mode 100755
index 0000000..32df10a
--- /dev/null
+++ b/backend/tolData/eol/genImagesListDb.py
@@ -0,0 +1,36 @@
+#!/usr/bin/python3
+
+import sys, os, re
+import csv
+import sqlite3
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Generates a sqlite db from a directory of CSV files holding EOL image data
+"""
+if len(sys.argv) > 1:
+ print(usageInfo, file=sys.stderr)
+ sys.exit(1)
+
+imagesListDir = "imagesList/"
+dbFile = "imagesList.db"
+
+print("Creating database")
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+dbCur.execute("CREATE TABLE images" \
+ " (content_id INT PRIMARY KEY, page_id INT, source_url TEXT, copy_url TEXT, license TEXT, copyright_owner TEXT)")
+print("Reading CSV files")
+csvFilenames = os.listdir(imagesListDir)
+for filename in csvFilenames:
+ print(f"Processing {imagesListDir}{filename}")
+ with open(imagesListDir + filename, newline="") as file:
+ for (contentId, pageId, sourceUrl, copyUrl, license, owner) in csv.reader(file):
+ if re.match(r"^[a-zA-Z]", contentId): # Skip header line
+ continue
+ dbCur.execute("INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)",
+ (int(contentId), int(pageId), sourceUrl, copyUrl, license, owner))
+print("Closing database")
+dbCon.commit()
+dbCon.close()