diff options
Diffstat (limited to 'backend/data')
| -rw-r--r-- | backend/data/README.md | 6 | ||||
| -rwxr-xr-x | backend/data/genEnwikiNameData.py | 2 | ||||
| -rwxr-xr-x | backend/data/genEolNameData.py | 4 |
3 files changed, 5 insertions, 7 deletions
diff --git a/backend/data/README.md b/backend/data/README.md index 19005e5..0bde721 100644 --- a/backend/data/README.md +++ b/backend/data/README.md @@ -49,21 +49,19 @@ File Generation Process 8 Run genLinkedImgs.py to add a 'linked_imgs' table to data.db, which uses 'nodes', 'edges', 'eol\_ids', and 'node_imgs', to associate nodes without images to child images. - 5 Reduced Tree Structure Data 1 Run genReducedTreeData.py, which adds 'r_nodes' and 'r_edges' tables to data.db, using reducedTol/names.txt, and the 'nodes' and 'names' tables. 6 Other - 1 Can run genEnwikiNameData.py, which adds more entries to the 'names' table, + - Optionally run genEnwikiNameData.py, which adds more entries to the 'names' table, using data in enwiki/enwikiData.db, and the 'names' and 'descs' tables. - 2 //node-trimming data.db Tables ============== - nodes: name TEXT PRIMARY KEY, id TEXT UNIQUE, tips INT - edges: node TEXT, child TEXT, p\_support INT, PRIMARY KEY (node, child) - eol\_ids: id INT PRIMARY KEY, name TEXT -- names: name TEXT, alt\_name TEXT, pref\_alt INT, PRIMARY KEY(name, alt\_name) +- names: name TEXT, alt\_name TEXT, pref\_alt INT, src TEXT, PRIMARY KEY(name, alt\_name) - descs: name TEXT PRIMARY KEY, desc TEXT, redirected INT, wiki\_id INT, from\_dbp INT - images: id INT, src TEXT, url TEXT, license TEXT, artist TEXT, credit TEXT, PRIMARY KEY (id, src) - node\_imgs: id TEXT PRIMARY KEY, img\_id INT, src TEXT diff --git a/backend/data/genEnwikiNameData.py b/backend/data/genEnwikiNameData.py index b5d8670..1beb522 100755 --- a/backend/data/genEnwikiNameData.py +++ b/backend/data/genEnwikiNameData.py @@ -64,7 +64,7 @@ print(f"Left with {numAltNames} alt-names") print("Adding alt-names") for (nodeName, altNames) in nodeToAltNames.items(): for altName in altNames: - dbCur.execute("INSERT INTO names VALUES (?, ?, ?)", (nodeName, altName, 0)) + dbCur.execute("INSERT INTO names VALUES (?, ?, ?, 'enwiki')", (nodeName, altName, 0)) # Close dbs dbCon.commit() dbCon.close() diff --git a/backend/data/genEolNameData.py b/backend/data/genEolNameData.py index 1b80dd8..74b0e3d 100755 --- a/backend/data/genEolNameData.py +++ b/backend/data/genEolNameData.py @@ -76,7 +76,7 @@ print(f"Found {len(nameToPickedPid)}") dbCon = sqlite3.connect(dbFile) dbCur = dbCon.cursor() # Create tables -dbCur.execute("CREATE TABLE names(name TEXT, alt_name TEXT, pref_alt INT, PRIMARY KEY(name, alt_name))") +dbCur.execute("CREATE TABLE names(name TEXT, alt_name TEXT, pref_alt INT, src TEXT, PRIMARY KEY(name, alt_name))") dbCur.execute("CREATE INDEX names_alt_idx ON names(alt_name)") dbCur.execute("CREATE INDEX names_alt_idx_nc ON names(alt_name COLLATE NOCASE)") dbCur.execute("CREATE TABLE eol_ids(id INT PRIMARY KEY, name TEXT)") @@ -94,7 +94,7 @@ def addToDb(nodeName, pidToUse): altNames.add(n) for n in altNames: isPreferred = 1 if (n == preferredName) else 0 - dbCur.execute("INSERT INTO names VALUES (?, ?, ?)", (nodeName, n, isPreferred)) + dbCur.execute("INSERT INTO names VALUES (?, ?, ?, 'eol')", (nodeName, n, isPreferred)) for name in nameToPickedPid: # Add manually-picked pids pickedPid = nameToPickedPid[name] usedPids.add(pickedPid) |
