aboutsummaryrefslogtreecommitdiff
path: root/backend/hist_data/reduce_event_data.py
blob: 5801f4d6354454868bb850dce43b16f76bb4204f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/python3

"""
Delete events from the database that have no image
"""

# Code used in unit testing (for resolving imports of modules within this directory)
import os, sys
parentDir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(parentDir)
# Standard imports
import argparse
import sqlite3
# Local imports
from cal import SCALES, dbDateToHistDate, dateToUnit

DB_FILE = 'data.db'

def reduceData(dbFile: str, scales: list[int]) -> None:
	dbCon = sqlite3.connect(dbFile)
	dbCur = dbCon.cursor()
	#
	print('Getting events to delete')
	eventsToDel: list[int] = []
	scaleUnitToDelCount: dict[tuple[int, int], int] = {} # Stores counts to subtract from entries in 'dist'
	query = 'SELECT events.id, events.start, events.fmt FROM events' \
		' LEFT JOIN event_imgs ON events.id = event_imgs.id WHERE event_imgs.id IS NULL'
	iterNum = 0
	for (eventId, start, fmt) in dbCur.execute(query):
		if iterNum % 1e5 == 0:
			print(f'At iteration {iterNum}')
		#
		eventsToDel.append(eventId)
		date = dbDateToHistDate(start, fmt)
		for scale in scales:
			unit = dateToUnit(date, scale)
			if (scale, unit) not in scaleUnitToDelCount:
				scaleUnitToDelCount[(scale, unit)] = 1
			else:
				scaleUnitToDelCount[(scale, unit)] += 1
	print(f'Found {len(eventsToDel)}')
	#
	print('Deleting events')
	iterNum = 0
	for eventId in eventsToDel:
		iterNum += 1
		if iterNum % 1e5 == 0:
			print(f'At iteration {iterNum}')
		#
		dbCur.execute('DELETE FROM events WHERE id = ?', (eventId,))
		dbCur.execute('DELETE FROM pop WHERE id = ?', (eventId,))
		dbCur.execute('DELETE FROM event_disp WHERE id = ?', (eventId,))
	for (scale, unit), delCount in scaleUnitToDelCount.items():
		dbCur.execute('UPDATE dist SET count = count - ? WHERE scale = ? AND unit = ?', (delCount, scale, unit))
	dbCur.execute('DELETE FROM dist WHERE count < 1')
	#
	dbCon.commit()
	dbCon.close()

if __name__ == '__main__':
	parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
	args = parser.parse_args()
	#
	reduceData(DB_FILE, SCALES)