1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
|
#!/usr/bin/python3
"""
Adds data about event distribution and displayability to the database.
"""
# Enable unit testing code to, when running this script, resolve imports of modules within this directory
import os, sys
parentDir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(parentDir)
import sqlite3
from cal import gregorianToJdn, jdnToGregorian
MONTH_SCALE = -1;
DAY_SCALE = -2;
SCALES: list[int] = [int(x) for x in [1e9, 1e8, 1e7, 1e6, 1e5, 1e4, 1e3, 100, 10, 1, MONTH_SCALE, DAY_SCALE]];
MAX_DISPLAYED_PER_UNIT = 4
#
DB_FILE = 'data.db'
def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None:
dbCon = sqlite3.connect(dbFile)
dbCur = dbCon.cursor()
#
print('Reading through events')
scaleUnitToCounts: dict[tuple[int, int], list[int]] = {}
# Maps scale and unit to two counts (num events in that unit, num events displayable for that unit)
# Only includes events with popularity values
idScales: set[tuple[int, int]] = set() # Maps event ids to scales they are displayable on
iterNum = 0
query = 'SELECT events.id, start, fmt FROM events INNER JOIN pop ON events.id = pop.id ORDER BY pop.pop DESC'
for eventId, eventStart, fmt in dbCur.execute(query):
iterNum += 1
if iterNum % 1e3 == 0:
print(f'At iteration {iterNum}')
# For each scale
for scale in scales:
# Get unit
unit: int
if scale >= 1:
unit = (eventStart if fmt == 0 else jdnToGregorian(eventStart)[0]) // scale
elif scale == MONTH_SCALE:
if fmt == 0:
unit = gregorianToJdn(eventStart, 1, 1)
else:
year, month, day = jdnToGregorian(eventStart)
unit = eventStart if day == 1 else gregorianToJdn(year, month, 1)
else: # scale == DAY_SCALE
unit = eventStart if fmt != 0 else gregorianToJdn(eventStart, 1, 1)
# Update maps
counts: list[int]
if (scale, unit) in scaleUnitToCounts:
counts = scaleUnitToCounts[(scale, unit)]
counts[0] += 1
else:
counts = [1, 0]
if counts[1] < maxDisplayedPerUnit:
counts[1] += 1
idScales.add((eventId, scale))
scaleUnitToCounts[(scale, unit)] = counts
#
print('Writing to db')
dbCur.execute('CREATE TABLE dist (scale INT, unit INT, count INT, PRIMARY KEY (scale, unit))')
for (scale, unit), (count, _) in scaleUnitToCounts.items():
dbCur.execute('INSERT INTO dist VALUES (?, ?, ?)', (scale, unit, count))
dbCur.execute('CREATE TABLE event_disp (id INT, scale INT, PRIMARY KEY (id, scale))')
for eventId, scale in idScales:
dbCur.execute('INSERT INTO event_disp VALUES (?, ?)', (eventId, scale))
#
print('Closing db')
dbCon.commit()
dbCon.close()
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
args = parser.parse_args()
#
genData(DB_FILE, SCALES, MAX_DISPLAYED_PER_UNIT)
|