From 2ab48497797441164e7f57fca2660097d93398ca Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Mon, 25 Apr 2022 01:33:08 +1000 Subject: Adapt to handle open-tree-of-life data Added data_otol/ with script that converts data from 'Open Tree of Life' release 13.4 into a JSON form. Moved old tree-of-life data and images into data_tol_old/. Added TolMap type to tol.ts, changed TolNode, and adapted other code to handle it. Temporarily disabling tile images until image data is added. --- data_tol_old/genTestImgs.sh | 16 ++ data_tol_old/tolData.txt | 388 ++++++++++++++++++++++++++++++++++++++++++ data_tol_old/txtTreeToJSON.py | 76 +++++++++ 3 files changed, 480 insertions(+) create mode 100755 data_tol_old/genTestImgs.sh create mode 100644 data_tol_old/tolData.txt create mode 100755 data_tol_old/txtTreeToJSON.py (limited to 'data_tol_old') diff --git a/data_tol_old/genTestImgs.sh b/data_tol_old/genTestImgs.sh new file mode 100755 index 0000000..21b001b --- /dev/null +++ b/data_tol_old/genTestImgs.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -e + +#generate tol.json from tol.txt +cat tolData.txt | ./txtTreeToJSON.py > tolData.json + +#reads through tolData.json, gets names, and generates image for each name +cat tolData.json | \ + gawk 'match ($0, /"name"\s*:\s*"([^"]*)"/, arr) {print arr[1]}' | \ + while read; do + convert -size 200x200 xc:khaki +repage \ + -size 150x150 -fill black -background None \ + -font Ubuntu-Mono -gravity center caption:"$REPLY" +repage \ + -gravity Center -composite -strip ../public/img/"$REPLY".png + done + diff --git a/data_tol_old/tolData.txt b/data_tol_old/tolData.txt new file mode 100644 index 0000000..f73a064 --- /dev/null +++ b/data_tol_old/tolData.txt @@ -0,0 +1,388 @@ +Tree of Life + Viruses + Caudovirales + Herpesvirales + Ligamenvirales + Mononegavirales + Nidovirales + Picornavirales + Tymovirales + Archaea + Crenarchaeota + Euryarchaeota + Bacteria + Acidobacteria + Actinobacteria + Aquificae + Armatimonadetes + Bacteroidetes + Caldiserica + Chlamydiae + Chlorobi + Chloroflexi + Chrysiogenetes + Cyanobacteria + Deferribacteres + Deinococcus-thermus + Dictyoglomi + Elusimicrobia + Fibrobacteres + Firmicutes + Fusobacteria + Gemmatimonadetes + Lentisphaerae + Nitrospira + Planctomycetes + Proteobacteria + Spirochaetae + Synergistetes + Tenericutes + Thermodesulfobacteria + Thermotogae + Verrucomicrobia + Eukaryota + Diatoms + Amoebozoa + Plantae + Rhodopyhta + Viridiplantae + Prasinophytes + Ulvophyceae + Streptophyta + Charales + Embryophytes + Marchantiomorpha + Anthocerotophyta + Bryophyta + Lycopodiopsida + Lycopodiidae + Selaginellales + Polypodiopsida + Polypodiidae + Polypodiales + Equisetidae + Spermatopsida + Cycads + Conifers + Taxaceae + Cupressaceae + Pinaceae + Pinus + Picea + Larix + Cedrus + Abies + Ginkgos + Angiosperms + Illicium + magnoliids + Piperales + Piperaceae + Magnoliales + Annonaceae + Myristicaceae + Laurales + Lauraceae + Monocotyledons + Alismatanae + Aranae + Liliaceae + Asparagales + Amaryllidaceae + Asparagaceae + Asphodelaceae + Iridaceae + Orchidaceae + Dioscoreaceae + Arecanae + Cocoeae + Phoeniceae + Zingiberanae + Musaceae + Strelitziaceae + Zingiberaceae + Commelinanae + Bromeliaceae + Cyperaceae + Typhaceae + Poaceae + Zea mays + Triticum + Bambusoideae + eudicots + Ranunculales + Papaveraceae + Ranunculaceae + Proteales + Proteaceae + Nelumbo + Core Eudicots + Saxifragales + Rosids + Fabaceae + Mimosoideae + IRLC (Inverted Repat-lacking clade) + Trifolieae + Fabeae + Rosales + Rosaceae + Rosa + Malus pumila + Ulmaceae + Urticaceae + Moraceae + Cannabaceae + Fagales + Fagaceae + Betulaceae + Juglandaceae + Cucurbitales + Cucurbitaceae + Malpighiales + Salicaceae + Violaceae + Passifloraceae + Erythroxylaceae + Rhizophoraceae + Euphorbiaceae + Linaceae + Rafflesiaceae + Myrtales + Myrtaceae + Onagraceae + Lythraceae + Brassicales + Caricaceae + Brassicaceae + Malvales + Core Malvales + Malvoideae + Bombacoideae + Sterculioideae + Helicteroideae + Byttnerioideae + Sapindales + Anacardiaceae + Burseraceae + Meliaceae + Rutaceae + Sapindaceae + Vitaceae + Caryophyllales + Polygonaceae + Droseraceae + Nepenthaceae + core Caryophyllales + Cactaceae + Amaranthaceae + Asterids + Ericales + Actinidiaceae + Ericaceae + Lecythidaceae + Sapotaceae + Ebenaceae + Theaceae + Solanales + Solanaceae + Convolvulaceae + Lamiales + Oleaceae + Fraxinus + Bignoniaceae + Pedaliaceae + Lentibulariaceae + Lamiaceae + Gentianales + Rubiaceae + Asterales + Campanulaceae + Asteraceae + Carduoideae + Cardueae + Cichorioideae + Cichorieae + Asteroideae + Asterodae + Helianthodae + Apiales + Apiaceae + Araliaceae + Aquifoliaceae + Fungi + Fungi 1 + Dikarya + Basidiomycota + Agaricomycotina + Agaricomycetes + Agaricomycetes 1 + Agaricomycetidae + Agaricales + Strophariaceae strict-sense + Psathyrellaceae + Agaricaceae + Nidulariaceae + Marasmiaceae + Physalacriaceae + Pleurotaceae + Amanitaceae + Podoserpula + Boletales + Serpulaceae + Sclerodermataceae + Boletaceae + Russulales + Hymenochaetales + Phallomycetidae + Geastrales + Gomphales + Phallales + Cantharellales + Auriculariales + Tremellomycetes + Ustilaginomycotina + Pucciniomycotina + Pucciniomycetes + Septobasidiales + Pucciniales + Mixiomycetes + Tritirachiomycetes + Entorrhizomycetes + Wallemiomycetes + Ascomycota + Pezizomycotina + Pezizomycetes + 'Leotiomyceta' + Eurotiomycetes + Geoglossaceae + Sordariomycetes + Hypocreomycetidae + Sordariomycetidae + Laboulbeniomycetes + Pleosporomycetidae + Saccharomycotina + Taphrinomycotina + Schizosaccharomycetes + Pneumocystidiomycetes + Taphrinomycetes + Glomeromycota + Zygomycota + Endogonales + Mucorales + Blastocladiomycota + Chytridiomycota + Neocallimastigomycota + Microsporidia + Animalia + Porifera + Cnidaria + Tardigrada + Annelida + Mollusca + Bivalvia + Gastropoda + Cephalopoda + Arthropoda + Arachnida + Araneae + Opiliones + Scorpiones + Heterostigmata + Crustacea + Euphausiacea + Brachyura + Isopoda + Cirripedia + Insecta + Anisoptera + Mantodea + Cicadoidea + Siphonaptera + Cucujoidea + Phengodidae + Drosophilidae + Culicidae + Lepidoptera + Apini + Formicidae + Deuterostomia + Echinodermata + Crinoidea + Asteroidea + Echinoidea + Holothuroidea + Vertebrata + Chondrichthyes + Carcharodon carcharias + Rhinocodon typus + Batoidea + Pristidae + Actinopterygii + Clupeomorpha + Xiphias gladius + Siluriformes + Carassius auratus + Tetraodontidae + Molidae + Gymnotiformes + Lophiiformes + Exocoetidae + 'mudskipper' + Hippocampus + Psudoliparis swirei + Sarcopterygii + Tetrapoda + Amphibia + Gymnophiona + Caudata + Salamandra + Cryptobranchidae + Ambystomatidae + Anura + Reptilia + Testudines + Plesiosauria + Chamaeleonidae + Serpentes + Crocodilia + Dinosauria + Triceratops + Sauropoda + Tyrannosauroidea + Aves + magpie + parrot + eagle + owl + swan + chicken + penguin + hummingbird + Synapsida + monotreme + marsupial + kangaroo + possum + wombat + rodent + mouse + beaver + rabbit + feline + canine + bear + walrus + Artiodactyla + pig + camel + deer + giraffe + horse + elephant + cetacean + armadillo + bat + monkey + gorilla + chimpanzee + homo sapien diff --git a/data_tol_old/txtTreeToJSON.py b/data_tol_old/txtTreeToJSON.py new file mode 100755 index 0000000..3b77622 --- /dev/null +++ b/data_tol_old/txtTreeToJSON.py @@ -0,0 +1,76 @@ +#!/usr/bin/python3 + +import sys, re + +usageInfo = f"usage: {sys.argv[0]}\n" +usageInfo += "Reads, from stdin, tab-indented lines representing trees, and outputs corresponding JSON.\n" + +if len(sys.argv) > 1: + print(usageInfo, file=sys.stderr) + sys.exit(1) + +lineNum = 0 +trees = [] #each node is a pair holding a name and an array of child nodes +nodeList = [] +while True: + #read line + line = sys.stdin.readline() + if line == "": break + line = line.rstrip() + lineNum += 1 + #create node + match = re.match(r"^\t*", line) + indent = len(match.group()) + newNode = [line[indent:], []] + #add node + if indent == len(nodeList): #sibling or new tree + if len(nodeList) == 0: + nodeList.append(newNode) + trees.append(newNode) + else: + nodeList[-1] = newNode + if len(nodeList) == 1: + trees[-1][1].append(newNode) + else: + nodeList[-2][1].append(newNode) + elif indent == len(nodeList) + 1: #direct child + if len(nodeList) == 0: + print(f"ERROR: Child without preceding root (line {lineNum})") + sys.exit(1) + nodeList.append(newNode) + nodeList[-2][1].append(newNode) + elif indent < len(nodeList): #ancestor sibling or new tree + nodeList = nodeList[:indent] + if len(nodeList) == 0: + nodeList.append(newNode) + trees.append(newNode) + else: + nodeList[-1] = newNode + if len(nodeList) == 1: + trees[-1][1].append(newNode) + else: + nodeList[-2][1].append(newNode) + else: + print(f"ERROR: Child with invalid relative indent (line {lineNum})") + sys.exit(1) +#print as JSON +if len(trees) > 1: + print("[") +def printNode(node, indent): + if len(node[1]) == 0: + print(indent + "{\"name\": \"" + node[0] + "\"}", end="") + else: + print(indent + "{\"name\": \"" + node[0] + "\", \"children\": [") + for i in range(len(node[1])): + printNode(node[1][i], indent + "\t") + if i < len(node[1])-1: + print(",", end="") + print() + print(indent + "]}", end="") +for i in range(len(trees)): + printNode(trees[i], "") + if i < len(trees)-1: + print(",", end="") + print() +if len(trees) > 1: + print("]") -- cgit v1.2.3