From e78c4df403e5f98afa08f7a0841ff233d5f6d05b Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Wed, 22 Jun 2022 01:42:41 +1000 Subject: Update backend READMEs, rename some files for consistency --- backend/data/eol/README.md | 33 ++++++++++++++++++++------------- backend/data/eol/reviewImgs.py | 2 +- 2 files changed, 21 insertions(+), 14 deletions(-) (limited to 'backend/data/eol') diff --git a/backend/data/eol/README.md b/backend/data/eol/README.md index 8338be0..fbb008d 100644 --- a/backend/data/eol/README.md +++ b/backend/data/eol/README.md @@ -1,18 +1,25 @@ -Downloaded Files -================ -- imagesList.tgz
- Obtained from https://opendata.eol.org/dataset/images-list on 24/04/2022. - Listed as being last updated on 05/02/2020. +This directory holds files obtained from/using the [Encyclopedia of Life](https://eol.org/). + +# Name Data Files - vernacularNames.csv
- Obtained from https://opendata.eol.org/dataset/vernacular-names on 24/04/2022. - Listed as being last updated on 27/10/2020. + Obtained from on 24/04/2022 (last updated on 27/10/2020). + Contains alternative-name data from EOL. -Generated Files -=============== +# Image Metadata Files +- imagesList.tgz
+ Obtained from on 24/04/2022 (last updated on 05/02/2020). + Contains metadata for images from EOL. - imagesList/
- Obtained by extracting imagesList.tgz. + Extracted from imagesList.tgz. - imagesList.db
- Represents data from eol/imagesList/*, and is created by genImagesListDb.sh.
+ Contains data from imagesList/. + Created by running genImagesListDb.sh, which simply imports csv files into a database.
Tables:
- - images: - content_id INT PRIMARY KEY, page_id INT, source_url TEXT, copy_url TEXT, license TEXT, copyright_owner TEXT + - `images`: + `content_id INT PRIMARY KEY, page_id INT, source_url TEXT, copy_url TEXT, license TEXT, copyright_owner TEXT` + +# Image Generation Files +- downloadImgs.py
+ Used to download image files into imgsForReview/. +- reviewImgs.py
+ Used to review images in imgsForReview/, moving acceptable ones into imgs/. diff --git a/backend/data/eol/reviewImgs.py b/backend/data/eol/reviewImgs.py index 4fea1c4..5290f9e 100755 --- a/backend/data/eol/reviewImgs.py +++ b/backend/data/eol/reviewImgs.py @@ -17,7 +17,7 @@ if len(sys.argv) > 1: sys.exit(1) imgDir = "imgsForReview/" -outDir = "imgsReviewed/" +outDir = "imgs/" extraInfoDbCon = sqlite3.connect("../data.db") extraInfoDbCur = extraInfoDbCon.cursor() def getExtraInfo(eolId): -- cgit v1.2.3