aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-10-04 23:13:36 +1100
committerTerry Truong <terry06890@gmail.com>2022-10-04 23:13:36 +1100
commit07b7ef49b07242014f288652980f5b15bfc087f1 (patch)
treedf50f2e7da51582615363e804f7b2185c22b800d
parent50fbff73a288f08b7027648ed4d50b2c02061f4c (diff)
Replace jdcal package with cal.py
-rw-r--r--backend/hist_data/README.md1
-rw-r--r--backend/hist_data/cal.py71
-rwxr-xr-xbackend/hist_data/gen_events_data.py23
-rw-r--r--backend/tests/test_cal.py29
4 files changed, 112 insertions, 12 deletions
diff --git a/backend/hist_data/README.md b/backend/hist_data/README.md
index 7653f09..517259c 100644
--- a/backend/hist_data/README.md
+++ b/backend/hist_data/README.md
@@ -35,7 +35,6 @@ This directory holds files used to generate the history database data.db.
## Environment
Some of the scripts use third-party packages:
-- `jdcal`: For date conversion
- `indexed_bzip2`: For parallelised bzip2 processing
- `mwxml`, `mwparserfromhell`: For parsing Wikipedia dumps
- `requests`: For downloading data
diff --git a/backend/hist_data/cal.py b/backend/hist_data/cal.py
new file mode 100644
index 0000000..c0e8772
--- /dev/null
+++ b/backend/hist_data/cal.py
@@ -0,0 +1,71 @@
+"""
+Provides functions for converting between Julian calendar, Gregorian calendar,
+and Julian day number values. Algorithms were obtained from
+https://en.wikipedia.org/wiki/Julian_day#Converting_Gregorian_calendar_date_to_Julian_Day_Number.
+"""
+
+def gregorianToJdn(year: int, month: int, day: int) -> int:
+ """
+ Converts a Gregorian calendar date to a Julian day number,
+ denoting the noon-to-noon 'Julian day' that starts within the input day.
+ A year of 1 means 1 CE, and -1 means 1 BC (0 is treated like -1).
+ A month of 1 means January. Can use a month of 13 and a day of 0.
+ Valid for dates from 24th Nov 4714 BC onwards.
+ """
+ if year < 0:
+ year += 1
+ x = int((month - 14) / 12)
+ jdn = int(1461 * (year + 4800 + x) / 4)
+ jdn += int((367 * (month - 2 - 12 * x)) / 12)
+ jdn -= int((3 * int((year + 4900 + x) / 100)) / 4)
+ jdn += day - 32075
+ return jdn
+
+def julianToJdn(year: int, month: int, day: int) -> int:
+ """
+ Like gregorianToJdn(), but converts a Julian calendar date.
+ Valid for dates from 1st Jan 4713 BC onwards.
+ """
+ if year < 0:
+ year += 1
+ jdn = 367 * year
+ jdn -= int(7 * (year + 5001 + int((month - 9) / 7)) / 4)
+ jdn += int(275 * month / 9)
+ jdn += day + 1729777
+ return jdn
+
+def jdnToGregorian(jdn: int) -> tuple[int, int, int]:
+ """
+ Converts a Julian day number to a Gregorian calendar date, denoting the
+ day in which the given noon-to-noon 'Julian day' begins.
+ Valid for non-negative input.
+ """
+ f = jdn + 1401 + (((4 * jdn + 274277) // 146097) * 3) // 4 - 38
+ e = 4 * f + 3
+ g = (e % 1461) // 4
+ h = 5 * g + 2
+ D = (h % 153) // 5 + 1
+ M = (h // 153 + 2) % 12 + 1
+ Y = (e // 1461) - 4716 + (12 + 2 - M) // 12
+ if Y <= 0:
+ Y -= 1
+ return Y, M, D
+
+def jdnToJulian(jdn: int) -> tuple[int, int, int]:
+ """ Like jdnToGregorian(), but converts to a Julian calendar date """
+ f = jdn + 1401
+ e = 4 * f + 3
+ g = (e % 1461) // 4
+ h = 5 * g + 2
+ D = (h % 153) // 5 + 1
+ M = (h // 153 + 2) % 12 + 1
+ Y = (e // 1461) - 4716 + (12 + 2 - M) // 12
+ if Y <= 0:
+ Y -= 1
+ return Y, M, D
+
+def julianToGregorian(year: int, month: int, day: int) -> tuple[int, int, int]:
+ return jdnToGregorian(julianToJdn(year, month, day))
+
+def gregorianToJulian(year: int, month: int, day: int) -> tuple[int, int, int]:
+ return jdnToJulian(gregorianToJdn(year, month, day))
diff --git a/backend/hist_data/gen_events_data.py b/backend/hist_data/gen_events_data.py
index d1038ce..f054f76 100755
--- a/backend/hist_data/gen_events_data.py
+++ b/backend/hist_data/gen_events_data.py
@@ -59,10 +59,16 @@ Info about objects with type 'quantity' can be found at: https://www.wikidata.or
# - Using pool.map() instead of pool.imap_unordered(), which seems to hang in some cases (was using python 3.8).
# Possibly related: https://github.com/python/cpython/issues/72882
-import os, io, math, re, argparse
+# Enable unit testing code to, when running this script, resolve imports of modules within this directory
+import os, sys
+parentDir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(parentDir)
+
+import io, math, re, argparse
import bz2, json, sqlite3
import multiprocessing, indexed_bzip2, pickle, tempfile
-from jdcal import gcal2jd, jcal2jd
+# Modules in this directory
+from cal import gregorianToJdn, julianToJdn
WIKIDATA_FILE = os.path.join('wikidata', 'latest-all.json.bz2')
DUMP_YEAR = 2022 # Used for converting 'age' values into dates
@@ -382,18 +388,16 @@ def getEventTime(dataVal) -> tuple[int, int | None, int] | None:
if year < -4713: # If before 4713 BCE (start of valid julian date period)
print(f'WARNING: Skipping sub-year-precision date before 4713 BCE: {json.dumps(dataVal)}')
return None
- if year < 0:
- year += 1 # Adjust for 'jdcal' treating year 0 as 1 BCE, year -1 as 2 BCE, etc
day = max(day, 1) # With month-precision, entry may have a 'day' of 0
if calendarmodel == 'http://www.wikidata.org/entity/Q1985727': # 'proleptic gregorian calendar'
- start = jdPairToJd(gcal2jd(year, month, day))
+ start = gregorianToJdn(year, month, day)
if precision == 10:
- startUpper = jdPairToJd(gcal2jd(year, month+1, 0))
+ startUpper = gregorianToJdn(year, month+1, 0)
timeFmt = 2
else: # "http://www.wikidata.org/entity/Q1985786" ('proleptic julian calendar')
- start = jdPairToJd(jcal2jd(year, month, day))
+ start = julianToJdn(year, month, day)
if precision == 10:
- startUpper = jdPairToJd(jcal2jd(year, month+1, 0))
+ startUpper = julianToJdn(year, month+1, 0)
timeFmt = 1
elif 0 <= precision < 10: # 'year' to 'gigaannum' precision
scale: int = 10 ** (9 - precision)
@@ -408,9 +412,6 @@ def getEventTime(dataVal) -> tuple[int, int | None, int] | None:
else:
return None
return start, startUpper, timeFmt
-def jdPairToJd(jdPair: tuple[int, int]) -> int:
- """ Converts a julian-date-representing value from jdcal into an int """
- return math.ceil(sum(jdPair))
# For using multiple processes
def readDumpChunkOneParam(params: tuple[int, str, str, str, int, int]) -> str:
diff --git a/backend/tests/test_cal.py b/backend/tests/test_cal.py
new file mode 100644
index 0000000..7f2aa41
--- /dev/null
+++ b/backend/tests/test_cal.py
@@ -0,0 +1,29 @@
+import unittest
+
+from hist_data.cal import \
+ gregorianToJdn, julianToJdn, jdnToGregorian, jdnToJulian, \
+ julianToGregorian, gregorianToJulian
+
+class TestCal(unittest.TestCase):
+ def test_gregorian_to_jdn(self):
+ self.assertEqual(gregorianToJdn(2010, 11, 3), 2455504)
+ self.assertEqual(gregorianToJdn(-4714, 11, 24), 0)
+ self.assertEqual(gregorianToJdn(-1, 1, 1), 1721060)
+ def test_julian_to_jdn(self):
+ self.assertEqual(julianToJdn(2010, 11, 3), 2455517)
+ self.assertEqual(julianToJdn(-4713, 1, 1), 0)
+ self.assertEqual(julianToJdn(-1, 1, 1), 1721058)
+ def test_jdn_to_gregorian(self):
+ self.assertEqual(jdnToGregorian(2455504), (2010, 11, 3))
+ self.assertEqual(jdnToGregorian(0), (-4714, 11, 24))
+ self.assertEqual(jdnToGregorian(1721060), (-1, 1, 1))
+ def test_jdn_to_julian(self):
+ self.assertEqual(jdnToJulian(2455517), (2010, 11, 3))
+ self.assertEqual(jdnToJulian(0), (-4713, 1, 1))
+ self.assertEqual(jdnToJulian(1721058), (-1, 1, 1))
+ def test_gregorian_to_julian(self):
+ self.assertEqual(gregorianToJulian(2022, 9, 30), (2022, 9, 17))
+ self.assertEqual(gregorianToJulian(1616, 5, 3), (1616, 4, 23))
+ def test_julian_to_gregorian(self):
+ self.assertEqual(julianToGregorian(2022, 9, 17), (2022, 9, 30))
+ self.assertEqual(julianToGregorian(1616, 4, 23), (1616, 5, 3))