aboutsummaryrefslogtreecommitdiff
path: root/backend/hist_data/gen_disp_data.py
blob: ca9b172bb0a0c9b0cad99a410943894aaa14f8f5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/python3

"""
Adds data about event distribution to the database,
and removes events not eligible for display
"""

import argparse
import os
import sqlite3

from .cal import SCALES, dbDateToHistDate, dateToUnit

MAX_DISPLAYED_PER_UNIT = 4

DATA_DIR = os.path.dirname(os.path.realpath(__file__))
DB_FILE = os.path.join(DATA_DIR, 'data.db')

def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int, forImageTables: bool) -> None:
	dbCon = sqlite3.connect(dbFile)
	dbCur = dbCon.cursor()

	print('Reading through events')
	scaleUnitToCounts: dict[tuple[int, int], list[int]] = {}
		# Maps scale and unit to two counts (num events in that unit, num events displayable for that unit)
		# Only includes events with popularity values
	idScales: dict[int, list[tuple[int, int]]] = {} # Maps event ids to scales+units they are displayable on
	iterNum = 0
	query = 'SELECT events.id, start, fmt FROM events INNER JOIN pop ON events.id = pop.id' \
		+ ('' if not forImageTables else ' INNER JOIN event_imgs ON events.id = event_imgs.id') \
		+ ' ORDER BY pop.pop DESC'
	for eventId, eventStart, fmt in dbCur.execute(query):
		iterNum += 1
		if iterNum % 1e5 == 0:
			print(f'At iteration {iterNum}')

		for scale in scales:
			unit = dateToUnit(dbDateToHistDate(eventStart, fmt), scale)
			# Update maps
			counts: list[int]
			if (scale, unit) in scaleUnitToCounts:
				counts = scaleUnitToCounts[(scale, unit)]
				counts[0] += 1
			else:
				counts = [1, 0]
			if counts[1] < maxDisplayedPerUnit:
				counts[1] += 1
				if eventId not in idScales:
					idScales[eventId] = []
				idScales[eventId].append((scale, unit))
			scaleUnitToCounts[(scale, unit)] = counts
	print(f'Results: {len(idScales)} displayable events')

	print('Looking for non-displayable events')
	eventsToDel: list[int] = []
	for eventId, eventStart, fmt in dbCur.execute(query):
		if eventId in idScales:
			continue
		eventsToDel.append(eventId)
		# Remove from data to be added to 'dist'
		for scale in scales:
			unit = dateToUnit(dbDateToHistDate(eventStart, fmt), scale)
			count = scaleUnitToCounts[(scale, unit)][0] - 1
			if count == 0:
				del scaleUnitToCounts[(scale, unit)]
			else:
				scaleUnitToCounts[(scale, unit)][0] = count
	for (eventId,) in dbCur.execute( # Find events without scores
		'SELECT events.id FROM events LEFT JOIN pop ON events.id = pop.id WHERE pop.id IS NULL'):
		eventsToDel.append(eventId)
	print(f'Found {len(eventsToDel)}')

	if not forImageTables:
		print(f'Deleting {len(eventsToDel)} events')
		iterNum = 0
		for eventId in eventsToDel:
			iterNum += 1
			if iterNum % 1e5 == 0:
				print(f'At iteration {iterNum}')
			#
			dbCur.execute('DELETE FROM events WHERE id = ?', (eventId,))
			dbCur.execute('DELETE FROM pop WHERE id = ?', (eventId,))

	print('Writing to db')
	distTable = 'dist' if not forImageTables else 'img_dist'
	dispTable = 'event_disp' if not forImageTables else 'img_disp'
	dbCur.execute(f'CREATE TABLE {distTable} (scale INT, unit INT, count INT, PRIMARY KEY (scale, unit))')
	for (scale, unit), (count, _) in scaleUnitToCounts.items():
		dbCur.execute(f'INSERT INTO {distTable} VALUES (?, ?, ?)', (scale, unit, count))
	dbCur.execute(f'CREATE TABLE {dispTable} (id INT, scale INT, unit INT, PRIMARY KEY (id, scale))')
	dbCur.execute(f'CREATE INDEX {dispTable}_scale_unit_idx ON event_disp(scale, unit)')
	for eventId, scaleUnits in idScales.items():
		for [scale, unit] in scaleUnits:
			dbCur.execute(f'INSERT INTO {dispTable} VALUES (?, ?, ?)', (eventId, scale, unit))

	print('Closing db')
	dbCon.commit()
	dbCon.close()

if __name__ == '__main__':
	parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
	parser.add_argument(
		'type', nargs='?', choices=['event', 'img'], default='event', help='The type of tables to generate')
	args = parser.parse_args()

	genData(DB_FILE, SCALES, MAX_DISPLAYED_PER_UNIT, args.type == 'img')