diff options
| author | Terry Truong <terry06890@gmail.com> | 2022-06-22 01:42:41 +1000 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2022-06-22 09:39:44 +1000 |
| commit | e78c4df403e5f98afa08f7a0841ff233d5f6d05b (patch) | |
| tree | f13dbf91228550075644be9766b4546eb20f1e1f /backend/data/eol | |
| parent | ae1467d2ab35a03eb2d7bf3e5ca1cf4634b23443 (diff) | |
Update backend READMEs, rename some files for consistency
Diffstat (limited to 'backend/data/eol')
| -rw-r--r-- | backend/data/eol/README.md | 33 | ||||
| -rwxr-xr-x | backend/data/eol/reviewImgs.py | 2 |
2 files changed, 21 insertions, 14 deletions
diff --git a/backend/data/eol/README.md b/backend/data/eol/README.md index 8338be0..fbb008d 100644 --- a/backend/data/eol/README.md +++ b/backend/data/eol/README.md @@ -1,18 +1,25 @@ -Downloaded Files -================ -- imagesList.tgz <br> - Obtained from https://opendata.eol.org/dataset/images-list on 24/04/2022. - Listed as being last updated on 05/02/2020. +This directory holds files obtained from/using the [Encyclopedia of Life](https://eol.org/). + +# Name Data Files - vernacularNames.csv <br> - Obtained from https://opendata.eol.org/dataset/vernacular-names on 24/04/2022. - Listed as being last updated on 27/10/2020. + Obtained from <https://opendata.eol.org/dataset/vernacular-names> on 24/04/2022 (last updated on 27/10/2020). + Contains alternative-name data from EOL. -Generated Files -=============== +# Image Metadata Files +- imagesList.tgz <br> + Obtained from <https://opendata.eol.org/dataset/images-list> on 24/04/2022 (last updated on 05/02/2020). + Contains metadata for images from EOL. - imagesList/ <br> - Obtained by extracting imagesList.tgz. + Extracted from imagesList.tgz. - imagesList.db <br> - Represents data from eol/imagesList/*, and is created by genImagesListDb.sh. <br> + Contains data from imagesList/. + Created by running genImagesListDb.sh, which simply imports csv files into a database. <br> Tables: <br> - - images: - content_id INT PRIMARY KEY, page_id INT, source_url TEXT, copy_url TEXT, license TEXT, copyright_owner TEXT + - `images`: + `content_id INT PRIMARY KEY, page_id INT, source_url TEXT, copy_url TEXT, license TEXT, copyright_owner TEXT` + +# Image Generation Files +- downloadImgs.py <br> + Used to download image files into imgsForReview/. +- reviewImgs.py <br> + Used to review images in imgsForReview/, moving acceptable ones into imgs/. diff --git a/backend/data/eol/reviewImgs.py b/backend/data/eol/reviewImgs.py index 4fea1c4..5290f9e 100755 --- a/backend/data/eol/reviewImgs.py +++ b/backend/data/eol/reviewImgs.py @@ -17,7 +17,7 @@ if len(sys.argv) > 1: sys.exit(1) imgDir = "imgsForReview/" -outDir = "imgsReviewed/" +outDir = "imgs/" extraInfoDbCon = sqlite3.connect("../data.db") extraInfoDbCur = extraInfoDbCon.cursor() def getExtraInfo(eolId): |
