aboutsummaryrefslogtreecommitdiff
path: root/backend/data/dbpedia
diff options
context:
space:
mode:
Diffstat (limited to 'backend/data/dbpedia')
-rw-r--r--backend/data/dbpedia/README.md45
-rwxr-xr-xbackend/data/dbpedia/genDescData.py (renamed from backend/data/dbpedia/genData.py)2
2 files changed, 24 insertions, 23 deletions
diff --git a/backend/data/dbpedia/README.md b/backend/data/dbpedia/README.md
index 78e2a90..8a08f20 100644
--- a/backend/data/dbpedia/README.md
+++ b/backend/data/dbpedia/README.md
@@ -1,28 +1,29 @@
-Downloaded Files
-================
-- labels\_lang=en.ttl.bz2 <br>
- Obtained via https://databus.dbpedia.org/dbpedia/collections/latest-core,
- using the link <https://databus.dbpedia.org/dbpedia/generic/labels/2022.03.01/labels_lang=en.ttl.bz2>.
-- page\_lang=en\_ids.ttl.bz2 <br>
+This directory holds files obtained from/using [Dbpedia](https://www.dbpedia.org).
+
+# Downloaded Files
+- `labels_lang=en.ttl.bz2` <br>
+ Obtained via https://databus.dbpedia.org/dbpedia/collections/latest-core.
+ Downloaded from <https://databus.dbpedia.org/dbpedia/generic/labels/2022.03.01/labels_lang=en.ttl.bz2>.
+- `page_lang=en_ids.ttl.bz2` <br>
Downloaded from <https://databus.dbpedia.org/dbpedia/generic/page/2022.03.01/page_lang=en_ids.ttl.bz2>
-- redirects\_lang=en\_transitive.ttl.bz2 <br>
+- `redirects_lang=en_transitive.ttl.bz2` <br>
Downloaded from <https://databus.dbpedia.org/dbpedia/generic/redirects/2022.03.01/redirects_lang=en_transitive.ttl.bz2>.
-- disambiguations\_lang=en.ttl.bz2 <br>
+- `disambiguations_lang=en.ttl.bz2` <br>
Downloaded from <https://databus.dbpedia.org/dbpedia/generic/disambiguations/2022.03.01/disambiguations_lang=en.ttl.bz2>.
-- instance-types\_lang=en\_specific.ttl.bz2 <br>
+- `instance-types_lang=en_specific.ttl.bz2` <br>
Downloaded from <https://databus.dbpedia.org/dbpedia/mappings/instance-types/2022.03.01/instance-types_lang=en_specific.ttl.bz2>.
-- short-abstracts\_lang=en.ttl.bz2 <br>
+- `short-abstracts_lang=en.ttl.bz2` <br>
Downloaded from <https://databus.dbpedia.org/vehnem/text/short-abstracts/2021.05.01/short-abstracts_lang=en.ttl.bz2>.
-Generated Files
-===============
-- dbpData.db <br>
- An sqlite database representing data from the ttl files.
- Generated by running genData.py.
- Tables
- - labels: iri TEXT PRIMARY KEY, label TEXT
- - ids: iri TEXT PRIMARY KEY, id INT
- - redirects: iri TEXT PRIMARY KEY, target TEXT
- - disambiguations: iri TEXT PRIMARY KEY
- - types: iri TEXT, type TEXT
- - abstracts: iri TEXT PRIMARY KEY, abstract TEXT
+# Other Files
+- genDescData.py <br>
+ Used to generate a database representing data from the ttl files.
+- descData.db <br>
+ Generated by genDescData.py. <br>
+ Tables: <br>
+ - `labels`: `iri TEXT PRIMARY KEY, label TEXT `
+ - `ids`: `iri TEXT PRIMARY KEY, id INT`
+ - `redirects`: `iri TEXT PRIMARY KEY, target TEXT`
+ - `disambiguations`: `iri TEXT PRIMARY KEY`
+ - `types`: `iri TEXT, type TEXT`
+ - `abstracts`: `iri TEXT PRIMARY KEY, abstract TEXT`
diff --git a/backend/data/dbpedia/genData.py b/backend/data/dbpedia/genDescData.py
index 41c48a8..bba3ff5 100755
--- a/backend/data/dbpedia/genData.py
+++ b/backend/data/dbpedia/genDescData.py
@@ -16,7 +16,7 @@ redirectsFile = "redirects_lang=en_transitive.ttl.bz2"
disambigFile = "disambiguations_lang=en.ttl.bz2"
typesFile = "instance-types_lang=en_specific.ttl.bz2"
abstractsFile = "short-abstracts_lang=en.ttl.bz2"
-dbFile = "dbpData.db"
+dbFile = "descData.db"
# Open db
dbCon = sqlite3.connect(dbFile)