aboutsummaryrefslogtreecommitdiff
path: root/backend/tolData/eol/genImagesListDb.py
diff options
context:
space:
mode:
Diffstat (limited to 'backend/tolData/eol/genImagesListDb.py')
-rwxr-xr-xbackend/tolData/eol/genImagesListDb.py34
1 files changed, 0 insertions, 34 deletions
diff --git a/backend/tolData/eol/genImagesListDb.py b/backend/tolData/eol/genImagesListDb.py
deleted file mode 100755
index 808292d..0000000
--- a/backend/tolData/eol/genImagesListDb.py
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/python3
-
-import os, re
-import csv
-import sqlite3
-
-import argparse
-parser = argparse.ArgumentParser(description="""
-Generates a sqlite db from a directory of CSV files holding EOL image data
-""", formatter_class=argparse.RawDescriptionHelpFormatter)
-parser.parse_args()
-
-imagesListDir = 'imagesList/'
-dbFile = 'imagesList.db'
-
-print('Creating database')
-dbCon = sqlite3.connect(dbFile)
-dbCur = dbCon.cursor()
-dbCur.execute('CREATE TABLE images' \
- ' (content_id INT PRIMARY KEY, page_id INT, source_url TEXT, copy_url TEXT, license TEXT, copyright_owner TEXT)')
-dbCur.execute('CREATE INDEX images_pid_idx ON images(page_id)')
-print('Reading CSV files')
-csvFilenames = os.listdir(imagesListDir)
-for filename in csvFilenames:
- print(f'Processing {imagesListDir}{filename}')
- with open(imagesListDir + filename, newline='') as file:
- for contentId, pageId, sourceUrl, copyUrl, license, owner in csv.reader(file):
- if re.match(r'^[a-zA-Z]', contentId): # Skip header line
- continue
- dbCur.execute('INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)',
- (int(contentId), int(pageId), sourceUrl, copyUrl, license, owner))
-print('Closing database')
-dbCon.commit()
-dbCon.close()