diff options
| author | Terry Truong <terry06890@gmail.com> | 2022-10-04 23:13:36 +1100 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2022-10-04 23:13:36 +1100 |
| commit | 07b7ef49b07242014f288652980f5b15bfc087f1 (patch) | |
| tree | df50f2e7da51582615363e804f7b2185c22b800d | |
| parent | 50fbff73a288f08b7027648ed4d50b2c02061f4c (diff) | |
Replace jdcal package with cal.py
| -rw-r--r-- | backend/hist_data/README.md | 1 | ||||
| -rw-r--r-- | backend/hist_data/cal.py | 71 | ||||
| -rwxr-xr-x | backend/hist_data/gen_events_data.py | 23 | ||||
| -rw-r--r-- | backend/tests/test_cal.py | 29 |
4 files changed, 112 insertions, 12 deletions
diff --git a/backend/hist_data/README.md b/backend/hist_data/README.md index 7653f09..517259c 100644 --- a/backend/hist_data/README.md +++ b/backend/hist_data/README.md @@ -35,7 +35,6 @@ This directory holds files used to generate the history database data.db. ## Environment Some of the scripts use third-party packages: -- `jdcal`: For date conversion - `indexed_bzip2`: For parallelised bzip2 processing - `mwxml`, `mwparserfromhell`: For parsing Wikipedia dumps - `requests`: For downloading data diff --git a/backend/hist_data/cal.py b/backend/hist_data/cal.py new file mode 100644 index 0000000..c0e8772 --- /dev/null +++ b/backend/hist_data/cal.py @@ -0,0 +1,71 @@ +""" +Provides functions for converting between Julian calendar, Gregorian calendar, +and Julian day number values. Algorithms were obtained from +https://en.wikipedia.org/wiki/Julian_day#Converting_Gregorian_calendar_date_to_Julian_Day_Number. +""" + +def gregorianToJdn(year: int, month: int, day: int) -> int: + """ + Converts a Gregorian calendar date to a Julian day number, + denoting the noon-to-noon 'Julian day' that starts within the input day. + A year of 1 means 1 CE, and -1 means 1 BC (0 is treated like -1). + A month of 1 means January. Can use a month of 13 and a day of 0. + Valid for dates from 24th Nov 4714 BC onwards. + """ + if year < 0: + year += 1 + x = int((month - 14) / 12) + jdn = int(1461 * (year + 4800 + x) / 4) + jdn += int((367 * (month - 2 - 12 * x)) / 12) + jdn -= int((3 * int((year + 4900 + x) / 100)) / 4) + jdn += day - 32075 + return jdn + +def julianToJdn(year: int, month: int, day: int) -> int: + """ + Like gregorianToJdn(), but converts a Julian calendar date. + Valid for dates from 1st Jan 4713 BC onwards. + """ + if year < 0: + year += 1 + jdn = 367 * year + jdn -= int(7 * (year + 5001 + int((month - 9) / 7)) / 4) + jdn += int(275 * month / 9) + jdn += day + 1729777 + return jdn + +def jdnToGregorian(jdn: int) -> tuple[int, int, int]: + """ + Converts a Julian day number to a Gregorian calendar date, denoting the + day in which the given noon-to-noon 'Julian day' begins. + Valid for non-negative input. + """ + f = jdn + 1401 + (((4 * jdn + 274277) // 146097) * 3) // 4 - 38 + e = 4 * f + 3 + g = (e % 1461) // 4 + h = 5 * g + 2 + D = (h % 153) // 5 + 1 + M = (h // 153 + 2) % 12 + 1 + Y = (e // 1461) - 4716 + (12 + 2 - M) // 12 + if Y <= 0: + Y -= 1 + return Y, M, D + +def jdnToJulian(jdn: int) -> tuple[int, int, int]: + """ Like jdnToGregorian(), but converts to a Julian calendar date """ + f = jdn + 1401 + e = 4 * f + 3 + g = (e % 1461) // 4 + h = 5 * g + 2 + D = (h % 153) // 5 + 1 + M = (h // 153 + 2) % 12 + 1 + Y = (e // 1461) - 4716 + (12 + 2 - M) // 12 + if Y <= 0: + Y -= 1 + return Y, M, D + +def julianToGregorian(year: int, month: int, day: int) -> tuple[int, int, int]: + return jdnToGregorian(julianToJdn(year, month, day)) + +def gregorianToJulian(year: int, month: int, day: int) -> tuple[int, int, int]: + return jdnToJulian(gregorianToJdn(year, month, day)) diff --git a/backend/hist_data/gen_events_data.py b/backend/hist_data/gen_events_data.py index d1038ce..f054f76 100755 --- a/backend/hist_data/gen_events_data.py +++ b/backend/hist_data/gen_events_data.py @@ -59,10 +59,16 @@ Info about objects with type 'quantity' can be found at: https://www.wikidata.or # - Using pool.map() instead of pool.imap_unordered(), which seems to hang in some cases (was using python 3.8). # Possibly related: https://github.com/python/cpython/issues/72882 -import os, io, math, re, argparse +# Enable unit testing code to, when running this script, resolve imports of modules within this directory +import os, sys +parentDir = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(parentDir) + +import io, math, re, argparse import bz2, json, sqlite3 import multiprocessing, indexed_bzip2, pickle, tempfile -from jdcal import gcal2jd, jcal2jd +# Modules in this directory +from cal import gregorianToJdn, julianToJdn WIKIDATA_FILE = os.path.join('wikidata', 'latest-all.json.bz2') DUMP_YEAR = 2022 # Used for converting 'age' values into dates @@ -382,18 +388,16 @@ def getEventTime(dataVal) -> tuple[int, int | None, int] | None: if year < -4713: # If before 4713 BCE (start of valid julian date period) print(f'WARNING: Skipping sub-year-precision date before 4713 BCE: {json.dumps(dataVal)}') return None - if year < 0: - year += 1 # Adjust for 'jdcal' treating year 0 as 1 BCE, year -1 as 2 BCE, etc day = max(day, 1) # With month-precision, entry may have a 'day' of 0 if calendarmodel == 'http://www.wikidata.org/entity/Q1985727': # 'proleptic gregorian calendar' - start = jdPairToJd(gcal2jd(year, month, day)) + start = gregorianToJdn(year, month, day) if precision == 10: - startUpper = jdPairToJd(gcal2jd(year, month+1, 0)) + startUpper = gregorianToJdn(year, month+1, 0) timeFmt = 2 else: # "http://www.wikidata.org/entity/Q1985786" ('proleptic julian calendar') - start = jdPairToJd(jcal2jd(year, month, day)) + start = julianToJdn(year, month, day) if precision == 10: - startUpper = jdPairToJd(jcal2jd(year, month+1, 0)) + startUpper = julianToJdn(year, month+1, 0) timeFmt = 1 elif 0 <= precision < 10: # 'year' to 'gigaannum' precision scale: int = 10 ** (9 - precision) @@ -408,9 +412,6 @@ def getEventTime(dataVal) -> tuple[int, int | None, int] | None: else: return None return start, startUpper, timeFmt -def jdPairToJd(jdPair: tuple[int, int]) -> int: - """ Converts a julian-date-representing value from jdcal into an int """ - return math.ceil(sum(jdPair)) # For using multiple processes def readDumpChunkOneParam(params: tuple[int, str, str, str, int, int]) -> str: diff --git a/backend/tests/test_cal.py b/backend/tests/test_cal.py new file mode 100644 index 0000000..7f2aa41 --- /dev/null +++ b/backend/tests/test_cal.py @@ -0,0 +1,29 @@ +import unittest + +from hist_data.cal import \ + gregorianToJdn, julianToJdn, jdnToGregorian, jdnToJulian, \ + julianToGregorian, gregorianToJulian + +class TestCal(unittest.TestCase): + def test_gregorian_to_jdn(self): + self.assertEqual(gregorianToJdn(2010, 11, 3), 2455504) + self.assertEqual(gregorianToJdn(-4714, 11, 24), 0) + self.assertEqual(gregorianToJdn(-1, 1, 1), 1721060) + def test_julian_to_jdn(self): + self.assertEqual(julianToJdn(2010, 11, 3), 2455517) + self.assertEqual(julianToJdn(-4713, 1, 1), 0) + self.assertEqual(julianToJdn(-1, 1, 1), 1721058) + def test_jdn_to_gregorian(self): + self.assertEqual(jdnToGregorian(2455504), (2010, 11, 3)) + self.assertEqual(jdnToGregorian(0), (-4714, 11, 24)) + self.assertEqual(jdnToGregorian(1721060), (-1, 1, 1)) + def test_jdn_to_julian(self): + self.assertEqual(jdnToJulian(2455517), (2010, 11, 3)) + self.assertEqual(jdnToJulian(0), (-4713, 1, 1)) + self.assertEqual(jdnToJulian(1721058), (-1, 1, 1)) + def test_gregorian_to_julian(self): + self.assertEqual(gregorianToJulian(2022, 9, 30), (2022, 9, 17)) + self.assertEqual(gregorianToJulian(1616, 5, 3), (1616, 4, 23)) + def test_julian_to_gregorian(self): + self.assertEqual(julianToGregorian(2022, 9, 17), (2022, 9, 30)) + self.assertEqual(julianToGregorian(1616, 4, 23), (1616, 5, 3)) |
