diff options
| -rw-r--r-- | backend/data/README.md | 1 | ||||
| -rwxr-xr-x | backend/data/dbpedia/genData.py | 26 | ||||
| -rwxr-xr-x | backend/data/downloadImgsForReview.py | 6 | ||||
| -rwxr-xr-x | backend/data/enwiki/genData.py | 10 | ||||
| -rwxr-xr-x | backend/data/enwiki/genDumpIndexDb.py | 8 | ||||
| -rwxr-xr-x | backend/data/enwiki/lookupPage.py | 4 | ||||
| -rwxr-xr-x | backend/data/genDbpConflicts.py | 16 | ||||
| -rwxr-xr-x | backend/data/genDbpData.py | 22 | ||||
| -rwxr-xr-x | backend/data/genEnwikiDescData.py | 8 | ||||
| -rwxr-xr-x | backend/data/genEnwikiNameData.py | 10 | ||||
| -rwxr-xr-x | backend/data/genEolNameData.py | 4 | ||||
| -rwxr-xr-x | backend/data/genImgsForWeb.py | 2 | ||||
| -rwxr-xr-x | backend/data/genLinkedImgs.py | 6 | ||||
| -rwxr-xr-x | backend/data/genOtolData.py | 12 | ||||
| -rwxr-xr-x | backend/data/genReducedTreeData.py | 16 | ||||
| -rwxr-xr-x | backend/data/reviewImgs.py | 15 | ||||
| -rwxr-xr-x | backend/server.py | 12 |
17 files changed, 89 insertions, 89 deletions
diff --git a/backend/data/README.md b/backend/data/README.md index 4655c2d..f090898 100644 --- a/backend/data/README.md +++ b/backend/data/README.md @@ -1,6 +1,5 @@ File Generation Process ======================= - 1 Tree Structure Data 1 Obtain data in otol/, as specified in it's README. 2 Run genOtolData.py, which creates data.db, and adds diff --git a/backend/data/dbpedia/genData.py b/backend/data/dbpedia/genData.py index 3df1961..7b48ac4 100755 --- a/backend/data/dbpedia/genData.py +++ b/backend/data/dbpedia/genData.py @@ -31,11 +31,11 @@ with bz2.open(labelsFile, mode='rt') as file: for line in file: lineNum += 1 if lineNum % 1e5 == 0: - print("Processing line {}".format(lineNum)) + print(f"Processing line {lineNum}") # match = labelLineRegex.fullmatch(line) if match == None: - print("ERROR: Line {} has unexpected format".format(lineNum), file=sys.stderr) + print(f"ERROR: Line {lineNum} has unexpected format", file=sys.stderr) sys.exit(1) else: dbCur.execute("INSERT INTO labels VALUES (?, ?)", (match.group(1), match.group(2))) @@ -49,18 +49,18 @@ with bz2.open(idsFile, mode='rt') as file: for line in file: lineNum += 1 if lineNum % 1e5 == 0: - print("Processing line {}".format(lineNum)) + print(f"Processing line {lineNum}") # match = idLineRegex.fullmatch(line) if match == None: - print("ERROR: Line {} has unexpected format".format(lineNum), file=sys.stderr) + print(f"ERROR: Line {lineNum} has unexpected format", file=sys.stderr) sys.exit(1) else: try: dbCur.execute("INSERT INTO ids VALUES (?, ?)", (match.group(1), int(match.group(2)))) except sqlite3.IntegrityError as e: # Accounts for certain lines that have the same IRI - print("Failed to add entry with IRI \"{}\": {}".format(match.group(1), e)) + print(f"Failed to add entry with IRI \"{match.group(1)}\": {e}") dbCon.commit() # Read/store redirects print("Reading/storing redirection data") @@ -71,11 +71,11 @@ with bz2.open(redirectsFile, mode='rt') as file: for line in file: lineNum += 1 if lineNum % 1e5 == 0: - print("Processing line {}".format(lineNum)) + print(f"Processing line {lineNum}") # match = redirLineRegex.fullmatch(line) if match == None: - print("ERROR: Line {} has unexpected format".format(lineNum), file=sys.stderr) + print(f"ERROR: Line {lineNum} has unexpected format", file=sys.stderr) sys.exit(1) else: dbCur.execute("INSERT INTO redirects VALUES (?, ?)", (match.group(1), match.group(2))) @@ -89,11 +89,11 @@ with bz2.open(disambigFile, mode='rt') as file: for line in file: lineNum += 1 if lineNum % 1e5 == 0: - print("Processing line {}".format(lineNum)) + print(f"Processing line {lineNum}") # match = disambigLineRegex.fullmatch(line) if match == None: - print("ERROR: Line {} has unexpected format".format(lineNum), file=sys.stderr) + print(f"ERROR: Line {lineNum} has unexpected format", file=sys.stderr) sys.exit(1) else: disambigNames.add(match.group(1)) @@ -111,11 +111,11 @@ with bz2.open(typesFile, mode='rt') as file: for line in file: lineNum += 1 if lineNum % 1e5 == 0: - print("Processing line {}".format(lineNum)) + print(f"Processing line {lineNum}") # match = typeLineRegex.fullmatch(line) if match == None: - print("ERROR: Line {} has unexpected format".format(lineNum), file=sys.stderr) + print(f"ERROR: Line {lineNum} has unexpected format", file=sys.stderr) sys.exit(1) else: dbCur.execute("INSERT INTO types VALUES (?, ?)", (match.group(1), match.group(2))) @@ -129,13 +129,13 @@ with bz2.open(abstractsFile, mode='rt') as file: for line in file: lineNum += 1 if lineNum % 1e5 == 0: - print("Processing line {}".format(lineNum)) + print(f"Processing line {lineNum}") # if line[0] == "#": continue match = descLineRegex.fullmatch(line) if match == None: - print("ERROR: Line {} has unexpected format".format(lineNum), file=sys.stderr) + print(f"ERROR: Line {lineNum} has unexpected format", file=sys.stderr) sys.exit(1) else: dbCur.execute("INSERT INTO abstracts VALUES (?, ?)", diff --git a/backend/data/downloadImgsForReview.py b/backend/data/downloadImgsForReview.py index 5556fce..d1191d7 100755 --- a/backend/data/downloadImgsForReview.py +++ b/backend/data/downloadImgsForReview.py @@ -56,7 +56,7 @@ def downloadImg(url, outFile): file.write(data.content) time.sleep(random.random() * (POST_DL_DELAY_MAX - POST_DL_DELAY_MIN) + POST_DL_DELAY_MIN) except Exception as e: - print("Error while downloading to {}: {}".format(outFile, str(e)), file=sys.stderr) + print(f"Error while downloading to {outFile}: {str(e)}", file=sys.stderr) threadException = e numThreads -= 1 # Create output directory if not present @@ -113,7 +113,7 @@ for idx in range(nextIdx, len(eolIds)): urlParts = urllib.parse.urlparse(url) extension = os.path.splitext(urlParts.path)[1] if len(extension) <= 1: - print("WARNING: No filename extension found in URL {}".format(url), file=sys.stderr) + print(f"WARNING: No filename extension found in URL {url}", file=sys.stderr) continue outFiles.append(str(eolId) + " " + str(contentId) + extension) urls.append(url) @@ -132,7 +132,7 @@ for idx in range(nextIdx, len(eolIds)): time.sleep(1) exitLoop = True break - print("Downloading image to {}".format(outPath)) + print("Downloading image to {outPath}") # Perform download numThreads += 1 thread = Thread(target=downloadImg, args=(urls[i], outPath), daemon=True) diff --git a/backend/data/enwiki/genData.py b/backend/data/enwiki/genData.py index 646292c..f439d11 100755 --- a/backend/data/enwiki/genData.py +++ b/backend/data/enwiki/genData.py @@ -24,9 +24,9 @@ parensGrpRegex = re.compile(r" \([^()]*\)") leftoverBraceRegex = re.compile(r"(?:{\||{{).*") def convertTemplateReplace(match): if match.group(2) == None: - return "{} {}".format(match.group(1), match.group(4)) + return f"{match.group(1)} {match.group(4)}" else: - return "{} {} {} {}".format(match.group(1), match.group(2), match.group(3), match.group(4)) + return f"{match.group(1)} {match.group(2)} {match.group(3)} {match.group(4)}" def parseDesc(text): # Find first matching line outside a {{...}} and [[...]] block-html-comments, then accumulate lines until a blank # Some cases not accounted for: disambiguation pages, abstracts with sentences split-across-lines, @@ -83,7 +83,7 @@ def convertTitle(title): # Check for existing db if os.path.exists(enwikiDb): - print("ERROR: Existing {}".format(enwikiDb), file=sys.stderr) + print(f"ERROR: Existing {enwikiDb}", file=sys.stderr) sys.exit(1) # Create db dbCon = sqlite3.connect(enwikiDb) @@ -101,14 +101,14 @@ with bz2.open(dumpFile, mode='rt') as file: for page in dump: pageNum += 1 if pageNum % 1e4 == 0: - print("At page {}".format(pageNum)) + print(f"At page {pageNum}") # Parse page if page.namespace == 0: try: dbCur.execute("INSERT INTO pages VALUES (?, ?)", (page.id, convertTitle(page.title))) except sqlite3.IntegrityError as e: # Accounts for certain pages that have the same title - print("Failed to add page with title \"{}\": {}".format(page.title, e)) + print(f"Failed to add page with title \"{page.title}\": {e}") continue if page.redirect != None: dbCur.execute("INSERT INTO redirects VALUES (?, ?)", (page.id, convertTitle(page.redirect))) diff --git a/backend/data/enwiki/genDumpIndexDb.py b/backend/data/enwiki/genDumpIndexDb.py index 13f7eb6..450754b 100755 --- a/backend/data/enwiki/genDumpIndexDb.py +++ b/backend/data/enwiki/genDumpIndexDb.py @@ -16,7 +16,7 @@ indexDb = "dumpIndex.db" # Check for existing db if os.path.exists(indexDb): - print("ERROR: Existing {}".format(indexDb), file=sys.stderr) + print(f"ERROR: Existing {indexDb}", file=sys.stderr) sys.exit(1) # Create db dbCon = sqlite3.connect(indexDb) @@ -31,7 +31,7 @@ with bz2.open(indexFile, mode='rt') as file: for line in file: lineNum += 1 if lineNum % 1e5 == 0: - print("At line {}".format(lineNum)) + print(f"At line {lineNum}") # match = lineRegex.fullmatch(line.rstrip()) (offset, _, title) = match.group(1,2,3) @@ -42,7 +42,7 @@ with bz2.open(indexFile, mode='rt') as file: dbCur.execute("INSERT INTO offsets VALUES (?, ?, ?)", (t, lastOffset, offset)) except sqlite3.IntegrityError as e: # Accounts for certain entries in the file that have the same title - print("Failed on title \"{}\": {}".format(t, e)) + print(f"Failed on title \"{t}\": {e}") titlesToAdd = [] lastOffset = offset titlesToAdd.append(title) @@ -50,7 +50,7 @@ for title in titlesToAdd: try: dbCur.execute("INSERT INTO offsets VALUES (?, ?, ?)", (title, lastOffset, -1)) except sqlite3.IntegrityError as e: - print("Failed on title \"{}\": {}".format(t, e)) + print(f"Failed on title \"{t}\": {e}") # Close db dbCon.commit() dbCon.close() diff --git a/backend/data/enwiki/lookupPage.py b/backend/data/enwiki/lookupPage.py index c795c35..1d379e7 100755 --- a/backend/data/enwiki/lookupPage.py +++ b/backend/data/enwiki/lookupPage.py @@ -26,7 +26,7 @@ if row == None: sys.exit(0) (_, pageOffset, endOffset) = row dbCon.close() -print("Found chunk at offset {}".format(pageOffset)) +print(f"Found chunk at offset {pageOffset}") # Read dump file print("Reading dump file") content = [] @@ -51,7 +51,7 @@ with open(dumpFile, mode='rb') as file: titleLine = lines[lineIdx] if titleLine.lstrip() == '<title>' + pageTitle + '</title>': found = True - print("Found title in chunk as page {}".format(pageNum)) + print(f"Found title in chunk as page {pageNum}") content.append(line) content.append(titleLine) while True: diff --git a/backend/data/genDbpConflicts.py b/backend/data/genDbpConflicts.py index 0ad4e1e..c0d3704 100755 --- a/backend/data/genDbpConflicts.py +++ b/backend/data/genDbpConflicts.py @@ -45,7 +45,7 @@ iterNum = 0 for (label,) in dbpCur.execute("SELECT label from labels"): iterNum += 1 if iterNum % 1e5 == 0: - print("Processing line {}".format(iterNum)) + print(f"Processing line {iterNum}") # if label in disambigLabels: continue @@ -70,7 +70,7 @@ for (name, variants) in nameToVariants.items(): namesToRemove.add(name) for name in namesToRemove: del nameToVariants[name] -print("Number of conflicts: {}".format(len(nameToVariants))) +print(f"Number of conflicts: {len(nameToVariants)}") # Try conflict resolution via taxon-type information print("Resolving conflicts using instance-type data") taxonTypes = { # Obtained from the DBpedia ontology @@ -109,7 +109,7 @@ iterNum = 0 for (label, type) in dbpCur.execute("SELECT label, type from labels INNER JOIN types on labels.iri = types.iri"): iterNum += 1 if iterNum % 1e5 == 0: - print("Processing line {}".format(iterNum)) + print(f"Processing line {iterNum}") # if type in taxonTypes: name = label.lower() @@ -121,7 +121,7 @@ for (label, type) in dbpCur.execute("SELECT label, type from labels INNER JOIN t name = match.group(1) if name in nameToVariants: del nameToVariants[name] -print("Number of conflicts: {}".format(len(nameToVariants))) +print(f"Number of conflicts: {len(nameToVariants)}") # Try conflict resolution via category-list # Does a generic-category pass first (avoid stuff like Pan being classified as a horse instead of an ape) print("Resolving conflicts using category-list") @@ -163,7 +163,7 @@ for (name, variants) in nameToVariants.items(): break for name in namesToRemove: del nameToVariants[name] -print("Number of conflicts: {}".format(len(nameToVariants))) +print(f"Number of conflicts: {len(nameToVariants)}") # Find descriptions for plain-named labels print("Finding descriptions for plain-named labels") labelToDesc = {} @@ -172,7 +172,7 @@ query = "SELECT label, abstract from labels INNER JOIN abstracts ON labels.iri = for (label, desc,) in dbpCur.execute(query): iterNum += 1 if iterNum % 1e5 == 0: - print("Processing line {}".format(iterNum)) + print(f"Processing line {iterNum}") # if label.lower() in nameToVariants: labelToDesc[label] = desc @@ -183,7 +183,7 @@ query = "SELECT label, abstract from labels" \ for (label, desc,) in dbpCur.execute(query): iterNum += 1 if iterNum % 1e5 == 0: - print("Processing line {}".format(iterNum)) + print(f"Processing line {iterNum}") # if label.lower() in nameToVariants: labelToDesc[label] = desc @@ -196,7 +196,7 @@ with open(outFile, "w") as file: file.write("\n") for n in variants: if n in labelToDesc: - file.write("\t{}: {}\n".format(n, labelToDesc[n])) + file.write(f"\t{n}: {labelToDesc[n]}\n") # Close dbs dbCon.close() dbpCon.close() diff --git a/backend/data/genDbpData.py b/backend/data/genDbpData.py index c8394ea..0655344 100755 --- a/backend/data/genDbpData.py +++ b/backend/data/genDbpData.py @@ -41,7 +41,7 @@ iterNum = 0 for (label,) in dbpCur.execute("SELECT label from labels"): iterNum += 1 if iterNum % 1e5 == 0: - print("Processing line {}".format(iterNum)) + print(f"Processing line {iterNum}") # if label in disambigLabels: continue @@ -66,7 +66,7 @@ for (name, variants) in nameToVariants.items(): for name in nodeToLabel: del nameToVariants[name] nodeToLabel["cellular organisms"] = "organism" # Special case for root node -print("Number of conflicts: {}".format(len(nameToVariants))) +print(f"Number of conflicts: {len(nameToVariants)}") # Try conflict resolution via picked-labels print("Resolving conflicts using picked-labels") with open(pickedLabelsFile) as file: @@ -79,15 +79,15 @@ with open(pickedLabelsFile) as file: else: match = nameVariantRegex.match(pickedLabel) if match == None: - print("WARNING: Picked label {} not found (1)".format(pickedLabel), file=sys.stderr) + print(f"WARNING: Picked label {pickedLabel} not found (1)", file=sys.stderr) else: name = match.group(1) if name not in nameToVariants: - print("WARNING: Picked label {} not found (2)".format(pickedLabel), file=sys.stderr) + print(f"WARNING: Picked label {pickedLabel} not found (2)", file=sys.stderr) else: nodeToLabel[name] = pickedLabel del nameToVariants[name] -print("Number of conflicts: {}".format(len(nameToVariants))) +print(f"Number of conflicts: {len(nameToVariants)}") # Try conflict resolution via category-list # Does a generic-category pass first (avoid stuff like Pan being classified as a horse instead of an ape) print("Resolving conflicts using category-list") @@ -131,7 +131,7 @@ for (name, variants) in nameToVariants.items(): break for name in namesToRemove: del nameToVariants[name] -print("Number of conflicts: {}".format(len(nameToVariants))) +print(f"Number of conflicts: {len(nameToVariants)}") # Try conflict resolution via taxon-type information print("Resolving conflicts using instance-type data") taxonTypes = { # Obtained from the DBpedia ontology @@ -170,7 +170,7 @@ iterNum = 0 for (label, type) in dbpCur.execute("SELECT label, type from labels INNER JOIN types on labels.iri = types.iri"): iterNum += 1 if iterNum % 1e5 == 0: - print("Processing line {}".format(iterNum)) + print(f"Processing line {iterNum}") # if type in taxonTypes: name = label.lower() @@ -184,7 +184,7 @@ for (label, type) in dbpCur.execute("SELECT label, type from labels INNER JOIN t if name in nameToVariants: nodeToLabel[name] = label del nameToVariants[name] -print("Number of conflicts: {}".format(len(nameToVariants))) +print(f"Number of conflicts: {len(nameToVariants)}") # Associate nodes with IRIs print("Getting nodes IRIs") nodeToIri = {} @@ -192,7 +192,7 @@ iterNum = 0 for (name, label) in nodeToLabel.items(): row = dbpCur.execute("SELECT iri FROM labels where label = ? COLLATE NOCASE", (label,)).fetchone() if row == None: - print("ERROR: Couldn't find label {}".format(label), file=sys.stderr) + print(f"ERROR: Couldn't find label {label}", file=sys.stderr) sys.exit(1) else: nodeToIri[name] = row[0] @@ -203,7 +203,7 @@ iterNum = 0 for (name, iri) in nodeToIri.items(): iterNum += 1 if iterNum % 1e4 == 0: - print("At iteration {}".format(iterNum)) + print(f"At iteration {iterNum}") # row = dbpCur.execute("SELECT target FROM redirects where iri = ?", (iri,)).fetchone() if row != None: @@ -216,7 +216,7 @@ iterNum = 0 for (name, iri) in nodeToIri.items(): iterNum += 1 if iterNum % 1e4 == 0: - print("At iteration {}".format(iterNum)) + print(f"At iteration {iterNum}") # query = "SELECT abstract, id FROM abstracts INNER JOIN ids ON abstracts.iri = ids.iri WHERE ids.iri = ?" row = dbpCur.execute(query, (iri,)).fetchone() diff --git a/backend/data/genEnwikiDescData.py b/backend/data/genEnwikiDescData.py index 40a6c92..57e4194 100755 --- a/backend/data/genEnwikiDescData.py +++ b/backend/data/genEnwikiDescData.py @@ -25,7 +25,7 @@ nodeNames = set() query = "SELECT nodes.name FROM nodes LEFT JOIN descs ON nodes.name = descs.name WHERE desc IS NULL" for row in dbCur.execute(query): nodeNames.add(row[0]) -print("Found {} names".format(len(nodeNames))) +print(f"Found {len(nodeNames)} names") # Find page id for each node name print("Getting node page-ids") nodeToPageId = {} @@ -33,7 +33,7 @@ iterNum = 0 for name in nodeNames: iterNum += 1 if iterNum % 1e4 == 0: - print("At iteration {}".format(iterNum)) + print(f"At iteration {iterNum}") # row = enwikiCur.execute("SELECT id FROM pages WHERE pages.title = ? COLLATE NOCASE", (name,)).fetchone() if row != None: @@ -45,7 +45,7 @@ iterNum = 0 for (name, pageId) in nodeToPageId.items(): iterNum += 1 if iterNum % 1000 == 0: - print("At iteration {}".format(iterNum)) + print(f"At iteration {iterNum}") # row = enwikiCur.execute( "SELECT pages.id FROM redirects INNER JOIN pages ON redirects.target = pages.title WHERE redirects.id = ?", @@ -59,7 +59,7 @@ iterNum = 0 for (name, pageId) in nodeToPageId.items(): iterNum += 1 if iterNum % 1000 == 0: - print("At iteration {}".format(iterNum)) + print(f"At iteration {iterNum}") # row = enwikiCur.execute("SELECT desc FROM descs where descs.id = ?", (pageId,)).fetchone() if row != None: diff --git a/backend/data/genEnwikiNameData.py b/backend/data/genEnwikiNameData.py index dfed46c..fd50338 100755 --- a/backend/data/genEnwikiNameData.py +++ b/backend/data/genEnwikiNameData.py @@ -25,7 +25,7 @@ print("Getting nodes with wiki IDs") nodeToWikiId = {} for row in dbCur.execute("SELECT name, wiki_id from descs"): nodeToWikiId[row[0]] = row[1] -print("Found {} nodes".format(len(nodeToWikiId))) +print(f"Found {len(nodeToWikiId)} nodes") # Find wiki-ids that redirect to each node print("Finding redirecter names") nodeToAltNames = {} @@ -34,7 +34,7 @@ iterNum = 0 for (nodeName, wikiId) in nodeToWikiId.items(): iterNum += 1 if iterNum % 1e4 == 0: - print("At iteration {}".format(iterNum)) + print(f"At iteration {iterNum}") # nodeToAltNames[nodeName] = set() query = "SELECT p1.title FROM pages p1" \ @@ -44,7 +44,7 @@ for (nodeName, wikiId) in nodeToWikiId.items(): if altNameRegex.fullmatch(name) != None: nodeToAltNames[nodeName].add(name.lower()) numAltNames += 1 -print("Found {} alt-names".format(numAltNames)) +print(f"Found {numAltNames} alt-names") # Remove existing alt-names print("Removing existing alt-names") query = "SELECT alt_name FROM names WHERE alt_name IN ({})" @@ -52,14 +52,14 @@ iterNum = 0 for (nodeName, altNames) in nodeToAltNames.items(): iterNum += 1 if iterNum % 1e4 == 0: - print("At iteration {}".format(iterNum)) + print(f"At iteration {iterNum}") # existingNames = set() for (name,) in dbCur.execute(query.format(",".join(["?"] * len(altNames))), list(altNames)): existingNames.add(name) numAltNames -= len(existingNames) altNames.difference_update(existingNames) -print("Left with {} alt-names".format(numAltNames)) +print(f"Left with {numAltNames} alt-names") # Add alt-names print("Adding alt-names") for (nodeName, altNames) in nodeToAltNames.items(): diff --git a/backend/data/genEolNameData.py b/backend/data/genEolNameData.py index fb91e8a..e7eccba 100755 --- a/backend/data/genEolNameData.py +++ b/backend/data/genEolNameData.py @@ -77,7 +77,7 @@ for row in dbCur2.execute("SELECT name FROM nodes"): name = row[0] iterationNum += 1 if iterationNum % 10000 == 0: - print("Loop 1 iteration {}".format(iterationNum)) + print(f"Loop 1 iteration {iterationNum}") # If name matches a canonical-name, add alt-name entries to 'names' table if name in canonicalNameToPids: pidToUse = 0 @@ -103,7 +103,7 @@ iterationNum = 0 for name in unresolvedNodeNames: iterationNum += 1 if iterationNum % 100 == 0: - print("Loop 2 iteration {}".format(iterationNum)) + print(f"Loop 2 iteration {iterationNum}") # Add alt-name entries to 'names' table for first corresponding pid pidToUse = 0 for pid in nameToPids[name]: diff --git a/backend/data/genImgsForWeb.py b/backend/data/genImgsForWeb.py index 91a1cde..9db260f 100755 --- a/backend/data/genImgsForWeb.py +++ b/backend/data/genImgsForWeb.py @@ -76,7 +76,7 @@ for i in range(inputImgIdx, len(inputImgList)): imgName = inputImgList[i] [eolIdStr, otherStr] = imgName.split(" ") contentId = int(otherStr.split(".")[0]) - print("Converting {}".format(imgName)) + print(f"Converting {imgName}") subprocess.run( ['npx', 'smartcrop-cli', '--width', str(IMG_OUT_SZ), diff --git a/backend/data/genLinkedImgs.py b/backend/data/genLinkedImgs.py index 2e3fdce..6e26ca4 100755 --- a/backend/data/genLinkedImgs.py +++ b/backend/data/genLinkedImgs.py @@ -27,7 +27,7 @@ query = "SELECT nodes.name, eol_ids.id FROM" \ " INNER JOIN images ON eol_ids.id = images.eol_id" for (name, eolId) in dbCur.execute(query): resolvedNodes[name] = eolId -print("Got {} nodes".format(len(resolvedNodes))) +print(f"Got {len(resolvedNodes)} nodes") # Iterate through resolved nodes, resolving ancestors where able print("Resolving ancestor nodes") nodesToResolve = {} @@ -37,7 +37,7 @@ iterNum = 0 while len(resolvedNodes) > 0: iterNum += 1 if iterNum % 1e3 == 0: - print("At iteration {}".format(iterNum)) + print(f"At iteration {iterNum}") # Get next node (nodeName, eolId) = resolvedNodes.popitem() processedNodes[nodeName] = eolId @@ -85,7 +85,7 @@ iterNum = 0 for nodeName in processedNodes.keys(): iterNum += 1 if iterNum % 1e3 == 0: - print("At iteration {}".format(iterNum)) + print(f"At iteration {iterNum}") # match = compoundNameRegex.fullmatch(nodeName) if match != None: diff --git a/backend/data/genOtolData.py b/backend/data/genOtolData.py index 87db2c4..2ea548a 100755 --- a/backend/data/genOtolData.py +++ b/backend/data/genOtolData.py @@ -131,13 +131,13 @@ def parseNewickName(): elif name[0] == "'": match = re.fullmatch(r"'([^\\\"]+) (ott\d+)'", name) if match == None: - raise Exception("ERROR: invalid name \"{}\"".format(name)) + raise Exception(f"ERROR: invalid name \"{name}\"") name = match.group(1).replace("''", "'") return [name, match.group(2)] else: match = re.fullmatch(r"([^\\\"]+)_(ott\d+)", name) if match == None: - raise Exception("ERROR: invalid name \"{}\"".format(name)) + raise Exception(f"ERROR: invalid name \"{name}\"") return [match.group(1).replace("_", " "), match.group(2)] rootId = parseNewick() # For nodes with *many* children, remove some of those children @@ -148,8 +148,8 @@ if os.path.exists(keptNamesFile): for line in file: namesToKeep.add(line.rstrip()) else: - print("WARNING: No '{}' file found".format(keptNamesFile)) -print("Read in {} nodes".format(len(namesToKeep))) + print(f"WARNING: No '{keptNamesFile}' file found") +print(f"Read in {len(namesToKeep)} nodes") keptAncestors = set() for name in namesToKeep: if name in nameToFirstId: @@ -237,7 +237,7 @@ def convertMrcaName(id): name = node["name"] childIds = node["children"] if len(childIds) < 2: - print("WARNING: MRCA node \"{}\" has less than 2 children".format(name), file=sys.stderr) + print(f"WARNING: MRCA node \"{name}\" has less than 2 children", file=sys.stderr) return # Get 2 children with most tips childTips = [nodeMap[id]["tips"] for id in childIds] @@ -261,7 +261,7 @@ def convertMrcaName(id): if match != None: childName2 = match.group(1) # Create composite name - node["name"] = "[{} + {}]".format(childName1, childName2) + node["name"] = f"[{childName1} + {childName2}]" return childName1 for [id, node] in nodeMap.items(): if node["name"].startswith("mrca"): diff --git a/backend/data/genReducedTreeData.py b/backend/data/genReducedTreeData.py index 9cdf1d6..a88573d 100755 --- a/backend/data/genReducedTreeData.py +++ b/backend/data/genReducedTreeData.py @@ -26,7 +26,7 @@ with open(nodeNamesFile) as file: for line in file: iterNum += 1 if iterNum % 100 == 0: - print("Iteration {}".format(iterNum)) + print(f"Iteration {iterNum}") # row = dbCur.execute("SELECT name from nodes WHERE name = ?", (line.rstrip(),)).fetchone() if row == None: @@ -36,7 +36,7 @@ with open(nodeNamesFile) as file: if len(minimalNames) == 0: print("ERROR: No names found", file=sys.stderr) sys.exit(1) -print("Name set has {} names".format(len(minimalNames))) +print(f"Name set has {len(minimalNames)} names") # Add nodes that connect up to root print("Getting connected nodes set") iterNum = 0 @@ -44,7 +44,7 @@ rootName = None for name in minimalNames: iterNum += 1 if iterNum % 100 == 0: - print("Iteration {}".format(iterNum)) + print(f"Iteration {iterNum}") # prevName = None while name != None: @@ -67,7 +67,7 @@ for name in minimalNames: break if name == None: rootName = prevName -print("New node set has {} nodes".format(len(nodeMap))) +print(f"New node set has {len(nodeMap)} nodes") # Remove certain 'chain collapsible' nodes print("Removing 'chain collapsible' nodes") namesToRemove = set() @@ -85,7 +85,7 @@ for (name, nodeObj) in nodeMap.items(): namesToRemove.add(name) for name in namesToRemove: del nodeMap[name] -print("New node set has {} nodes".format(len(nodeMap))) +print(f"New node set has {len(nodeMap)} nodes") # Merge-upward compsite-named nodes print("Merging-upward composite-named nodes") namesToRemove2 = set() @@ -103,7 +103,7 @@ for (name, nodeObj) in nodeMap.items(): for name in namesToRemove2: del nodeMap[name] namesToRemove.add(name) -print("New node set has {} nodes".format(len(nodeMap))) +print(f"New node set has {len(nodeMap)} nodes") # Add some connected children print("Adding additional nearby children") namesToAdd = [] @@ -111,7 +111,7 @@ iterNum = 0 for (name, nodeObj) in nodeMap.items(): iterNum += 1 if iterNum % 100 == 0: - print("Iteration {}".format(iterNum)) + print(f"Iteration {iterNum}") # numChildren = len(nodeObj["children"]) if numChildren < PREF_NUM_CHILDREN: @@ -131,7 +131,7 @@ for name in namesToAdd: "tips": 0, "pSupport": pSupport == 1, } -print("New node set has {} nodes".format(len(nodeMap))) +print(f"New node set has {len(nodeMap)} nodes") # set tips vals print("Setting tips vals") def setTips(nodeName): diff --git a/backend/data/reviewImgs.py b/backend/data/reviewImgs.py index 037ef07..63e7dd5 100755 --- a/backend/data/reviewImgs.py +++ b/backend/data/reviewImgs.py @@ -135,11 +135,12 @@ class EolImgReviewer: row = dbCur.execute(query, (self.nextEolId,)).fetchone() if row != None: commonName = row[1] - self.root.title("Reviewing EOL ID {}, aka \"{}\" (imgs {} to {} out of {})".format( - self.nextEolId, commonName, firstImgIdx, lastImgIdx, len(self.imgList))) + self.root.title( + f"Reviewing EOL ID {self.nextEolId}, aka \"{commonName}\"" \ + f"(imgs {firstImgIdx} to {lastImgIdx} out of {len(self.imgList)})") else: - self.root.title("Reviewing EOL ID {} (imgs {} to {} out of {})".format( - self.nextEolId, firstImgIdx, lastImgIdx, len(self.imgList))) + self.root.title( + f"Reviewing EOL ID {self.nextEolId} (imgs {firstImgIdx} to {lastImgIdx} out of {len(self.imgList)})") def accept(self, imgIdx): """ React to a user selecting an image """ if imgIdx >= len(self.nextImgNames): @@ -173,11 +174,11 @@ class EolImgReviewer: self.labels[imgIdx].config(image=self.photoImgs[imgIdx]) self.rotations[imgIdx] = (self.rotations[imgIdx] + deg) % 360 def quit(self, e = None): - print("Number reviewed: {}".format(self.numReviewed)) + print(f"Number reviewed: {self.numReviewed}") timeElapsed = time.time() - self.startTime - print("Time elapsed: {:.2f} seconds".format(timeElapsed)) + print(f"Time elapsed: {timeElapsed:.2f} seconds") if self.numReviewed > 0: - print("Avg time per review: {:.2f} seconds".format(timeElapsed / self.numReviewed)) + print(f"Avg time per review: {timeElapsed/self.numReviewed:.2f} seconds") dbCon.close() self.root.destroy() def resizeForDisplay(self, img): diff --git a/backend/server.py b/backend/server.py index a712216..c564466 100755 --- a/backend/server.py +++ b/backend/server.py @@ -35,7 +35,7 @@ def lookupNodes(names, useReducedTree): nodesTable = "nodes" if not useReducedTree else "r_nodes" edgesTable = "edges" if not useReducedTree else "r_edges" queryParamStr = ",".join(["?"] * len(names)) - query = "SELECT name, tips FROM {} WHERE name IN ({})".format(nodesTable, queryParamStr) + query = f"SELECT name, tips FROM {nodesTable} WHERE name IN ({queryParamStr})" for (nodeName, tips) in cur.execute(query, names): nodeObjs[nodeName] = { "children": [], @@ -45,10 +45,10 @@ def lookupNodes(names, useReducedTree): "commonName": None, "imgName": None, } - query = "SELECT node, child FROM {} WHERE node IN ({})".format(edgesTable, queryParamStr) + query = f"SELECT node, child FROM {edgesTable} WHERE node IN ({queryParamStr})" for (nodeName, childName) in cur.execute(query, names): nodeObjs[nodeName]["children"].append(childName) - query = "SELECT node, child, p_support FROM {} WHERE child IN ({})".format(edgesTable, queryParamStr) + query = f"SELECT node, child, p_support FROM {edgesTable} WHERE child IN ({queryParamStr})" for (nodeName, childName, pSupport) in cur.execute(query, names): nodeObjs[childName]["parent"] = None if nodeName == "" else nodeName nodeObjs[childName]["pSupport"] = (pSupport == 1) @@ -72,7 +72,7 @@ def lookupNodes(names, useReducedTree): str(eolId2) + ".jpg" if eolId2 != 0 else None, ] # Get preferred-name info - query = "SELECT name, alt_name FROM names WHERE pref_alt = 1 AND name IN ({})".format(queryParamStr) + query = f"SELECT name, alt_name FROM names WHERE pref_alt = 1 AND name IN ({queryParamStr})" for (name, altName) in cur.execute(query, names): if altName != name: nodeObjs[name]["commonName"] = altName @@ -185,7 +185,7 @@ class DbServer(BaseHTTPRequestHandler): if not ranOnce: self.respondJson(results) return - print("ERROR: Parent-chain node {} not found".format(name), file=sys.stderr) + print(f"ERROR: Parent-chain node {name} not found", file=sys.stderr) break nodeObj = nodeObjs[name] results[name] = nodeObj @@ -221,7 +221,7 @@ class DbServer(BaseHTTPRequestHandler): self.wfile.write(json.dumps(val).encode("utf-8")) server = HTTPServer((hostname, port), DbServer) -print("Server started at http://{}:{}".format(hostname, port)) +print(f"Server started at http://{hostname}:{port}") try: server.serve_forever() except KeyboardInterrupt: |
