diff options
| -rw-r--r-- | .gitignore | 3 | ||||
| -rw-r--r-- | backend/data/README.md | 6 | ||||
| -rwxr-xr-x | backend/data/genDbpData.py | 2 | ||||
| -rwxr-xr-x | backend/data/genEnwikiDescData.py | 2 |
4 files changed, 6 insertions, 7 deletions
@@ -23,8 +23,7 @@ /backend/data/genOtolNamesToKeep.txt /backend/data/genOtolDataPickedDups.txt /backend/data/genEolNameDataPickedIds.txt -/backend/data/dbpNamesToSkip.txt +/backend/data/genDescNamesToSkip.txt /backend/data/dbpPickedLabels.txt -/backend/data/genEnwikiDescNamesToSkip.txt /backend/data/genEnwikiDescTitlesToUse.txt /backend/data/mergedImgList.txt diff --git a/backend/data/README.md b/backend/data/README.md index 6ec629a..0845450 100644 --- a/backend/data/README.md +++ b/backend/data/README.md @@ -25,12 +25,12 @@ File Generation Process 1 Obtain data in dbpedia/, as specified in it's README. 2 Run genDbpData.py, which adds a 'descs' table to data.db, using data in dbpedia/dbpData.db, the 'nodes' table, and possibly - dbpNamesToSkip.txt and dbpPickedLabels.txt. + genDescNamesToSkip.txt and dbpPickedLabels.txt. 5 Supplementary Name/Description/Image Data 1 Obtain data in enwiki/, as specified in it's README. 2 Run genEnwikiDescData.py, which adds to the 'descs' table, using data in - enwiki/enwikiData.db, and the 'nodes' table. Also uses genEnwikiDesc*.txt - files for skipping/resolving some name-page associations. + enwiki/enwikiData.db, and the 'nodes' table. Also uses genDescNamesToSkip.txt and + genEnwikiDescTitlesToUse.txt for skipping/resolving some name-page associations. 3 Optionally run genEnwikiNameData.py, which adds to the 'names' table, using data in enwiki/enwikiData.db, and the 'names' and 'descs' tables. 4 In enwiki/, run getEnwikiImgData.py, which generates a list of diff --git a/backend/data/genDbpData.py b/backend/data/genDbpData.py index 747d0be..3755145 100755 --- a/backend/data/genDbpData.py +++ b/backend/data/genDbpData.py @@ -13,7 +13,7 @@ if len(sys.argv) > 1: sys.exit(1) dbpediaDb = "dbpedia/dbpData.db" -namesToSkipFile = "dbpNamesToSkip.txt" +namesToSkipFile = "genDescNamesToSkip.txt" pickedLabelsFile = "dbpPickedLabels.txt" dbFile = "data.db" diff --git a/backend/data/genEnwikiDescData.py b/backend/data/genEnwikiDescData.py index ce715d3..4445c3f 100755 --- a/backend/data/genEnwikiDescData.py +++ b/backend/data/genEnwikiDescData.py @@ -13,7 +13,7 @@ if len(sys.argv) > 1: enwikiDb = "enwiki/enwikiData.db" dbFile = "data.db" -namesToSkipFile = "genEnwikiDescNamesToSkip.txt" +namesToSkipFile = "genDescNamesToSkip.txt" titlesToUseFile = "genEnwikiDescTitlesToUse.txt" titleToUseRegex = re.compile(r"(.*) \(.*\)") |
