dh-tech · rlskoeser · Feb 12, 2026 · Feb 12, 2026 · Feb 12, 2026 · Feb 12, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Change Log
 
+## 0.7
+
+- Add parsing to Gregorian date converter; supports month names (full or abbreviated)
+  in English, French, German, Spanish, Kinyarwanda, Ganda, and Tigrinya
+- Include Gregorian dates in omnibus parser
+
 ## 0.6
 
 - Experimental omnibus date converter + parser (EDTF, Hebrew, Hijri)

diff --git a/DEVELOPER_NOTES.md b/DEVELOPER_NOTES.md
@@ -88,4 +88,18 @@ pip install -e ".[docs]"
 sphinx-build docs docs/_build
 ```
 
-HTML documentation will be generated in `docs/_build/html`
+HTML documentation will be generated in `docs/_build/html`
+
+
+### Regenerating multilingual Gregorian month name parse file
+
+The Gregorian Lark parser includes a script-generated file, which
+populates month names based on a list of language codes using the Babel
+library.  To regenerate, run the script with hatch (which should
+be installed globally)::
+```sh
+hatch run codegen:generate
+```
+
+When the `.lark` file is modified by the script, it must be committed to git.
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -81,6 +81,12 @@ path = "src/undate/__init__.py"
 [tool.hatch.build.targets.sdist]
 include = ["src/undate/**/*.py", "src/undate/**/*.lark", "tests/**"]
 
+[tool.hatch.envs.codegen]
+dependencies = ["babel"]
+
+[tool.hatch.envs.codegen.scripts]
+generate = "python scripts/generate_gregorian_grammar.py"
+
 [tool.pytest.ini_options]
 pythonpath = "src/"
 markers = [

diff --git a/scripts/generate_gregorian_grammar.py b/scripts/generate_gregorian_grammar.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+"""
+This script generates the gregorian_multilang.lark file
+with month names (full and abbreviated) based on the list of
+target languages.
+
+Run this script with hatch to regenerate the file::
+
+    hatch run codegen:generate
+
+"""
+
+from collections import defaultdict
+import pathlib
+
+from babel.dates import get_month_names
+
+# lark grammar path relative to this script
+GRAMMAR_DIR_PATH = (
+    pathlib.Path(__file__).parent.parent / "src" / "undate" / "converters" / "grammars"
+)
+# file that is generated by this script, in that directory
+MONTH_GRAMMAR_FILE = GRAMMAR_DIR_PATH / "gregorian_multilang.lark"
+
+# include month names in the following languages
+languages = [
+    "en",  # English
+    "es",  # Spanish
+    "fr",  # French
+    "de",  # German
+    "rw",  # Kinyarwanda
+    "lg",  # Ganda
+    "ti",  # Tigrinya
+]
+
+# warning to include at top of generated file
+warning_text = """// WARNING: This file is auto-generated. DO NOT EDIT.
+// To regenerate: hatch run codegen:generate
+
+"""
+
+
+def main():
+    # create a dictionary of lists to hold the names for each month
+    all_month_names = defaultdict(list)
+
+    for lang in languages:
+        for width in ["wide", "abbreviated"]:
+            for month_num, month_name in get_month_names(width, locale=lang).items():
+                # some locales use a . on the shortened month; let's ignore that
+                month_name = month_name.strip(".").lower()
+                # In some cases different languages have the same abbreviations;
+                # in some cases, abbreviated and full are the same.
+                # Only add if not already present, to avoid redundancy
+                if month_name not in all_month_names[month_num]:
+                    all_month_names[month_num].append(month_name)
+
+    with MONTH_GRAMMAR_FILE.open("w") as outfile:
+        outfile.write(warning_text)
+
+        # for each numeric month, generate a rule with all variant names:
+        # month_1:  /January|Jan/i
+        for i, names in all_month_names.items():
+            # combine all names in a case-insensitive OR regex
+            # sort shortest variants last to avoid partial matches hitting first
+            or_names = "|".join(sorted(names, key=len, reverse=True))
+            outfile.write(f"month_{i}: /({or_names})/i\n")
+
+    print(
+        f"Successfully regenerated {MONTH_GRAMMAR_FILE.relative_to(pathlib.Path.cwd())}"
+    )
+    print("If the file has changed, make sure to commit the new version.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/undate/converters/calendars/gregorian/__init__.py b/src/undate/converters/calendars/gregorian/__init__.py
@@ -0,0 +1,3 @@
+from undate.converters.calendars.gregorian.converter import GregorianDateConverter
+
+__all__ = ["GregorianDateConverter"]
diff --git a/src/undate/converters/calendars/gregorian.py → ...nverters/calendars/gregorian/converter.py b/src/undate/converters/calendars/gregorian.py → ...nverters/calendars/gregorian/converter.py
@@ -1,6 +1,11 @@
 from calendar import monthrange, isleap
 
+from lark.exceptions import UnexpectedInput
+
+from undate.undate import Undate
 from undate.converters.base import BaseCalendarConverter
+from undate.converters.calendars.gregorian.parser import gregorian_parser
+from undate.converters.calendars.gregorian.transformer import GregorianDateTransformer
 
 
 class GregorianDateConverter(BaseCalendarConverter):
@@ -18,6 +23,9 @@ class GregorianDateConverter(BaseCalendarConverter):
     #: arbitrary known leap year
     LEAP_YEAR: int = 2024
 
+    def __init__(self):
+        self.transformer = GregorianDateTransformer()
+
     def min_month(self) -> int:
         """First month for the Gregorian calendar."""
         return 1
@@ -79,3 +87,25 @@ def to_gregorian(self, year, month, day) -> tuple[int, int, int]:
         a common point of comparison.
         """
         return (year, month, day)
+
+    def parse(self, value: str) -> Undate:
+        """
+        Parse a Gregorian date string of any supported precision in any
+        supported language and return an :class:`~undate.undate.Undate`.
+        The input date string is preserved in the label of the resulting
+        Undate object.
+        """
+        if not value:
+            raise ValueError("Parsing empty string is not supported")
+
+        # parse the input string, then transform to undate object
+        try:
+            # parse the string with our Gregorian date parser
+            parsetree = gregorian_parser.parse(value)
+            # transform the parse tree into an undate object
+            undate_obj = self.transformer.transform(parsetree)
+            # set the original date string as the label
+            undate_obj.label = value
+            return undate_obj
+        except UnexpectedInput as err:
+            raise ValueError(f"Could not parse '{value}' as a Gregorian date") from err
diff --git a/src/undate/converters/calendars/gregorian/parser.py b/src/undate/converters/calendars/gregorian/parser.py
@@ -0,0 +1,10 @@
+from lark import Lark
+
+from undate.converters import GRAMMAR_FILE_PATH
+
+grammar_path = GRAMMAR_FILE_PATH / "gregorian.lark"
+
+# open based on filename to allow relative imports based on grammar file
+gregorian_parser = Lark.open(
+    str(grammar_path), rel_to=__file__, start="gregorian_date", strict=True
+)
diff --git a/src/undate/converters/calendars/gregorian/transformer.py b/src/undate/converters/calendars/gregorian/transformer.py
@@ -0,0 +1,42 @@
+from lark import Transformer, Tree
+
+from undate import Undate, Calendar
+
+
+class GregorianDateTransformer(Transformer):
+    """Transform a Gregorian date parse tree and return an Undate."""
+
+    # Currently parser should not result in intervals
+
+    calendar = Calendar.GREGORIAN
+
+    def gregorian_date(self, items):
+        parts = {}
+        for child in items:
+            if child.data in ["year", "month", "day"]:
+                # in each case we expect one integer value;
+                # anonymous tokens convert to their value and cast as int
+                value = int(child.children[0])
+                parts[str(child.data)] = value
+
+        # initialize and return an undate with year, month, day and
+        # Gregorian calendar
+        return Undate(**parts, calendar=self.calendar)
+
+    def year(self, items):
+        # combine multiple parts into a single string
+        value = "".join([str(i) for i in items])
+        return Tree(data="year", children=[value])
+
+    def month(self, items):
+        # month has a nested tree for the rule and the value
+        # the name of the rule (month_1, month_2, etc) gives us the
+        # number of the month needed for converting the date
+        tree = items[0]
+        month_n = tree.data.split("_")[-1]
+        return Tree(data="month", children=[month_n])
+
+    def day(self, items):
+        # combine multiple parts into a single string
+        value = "".join([str(i) for i in items])
+        return Tree(data="day", children=[value])
diff --git a/src/undate/converters/calendars/hebrew/converter.py b/src/undate/converters/calendars/hebrew/converter.py
@@ -1,7 +1,7 @@
 from typing import Union
 
 from convertdate import hebrew  # type: ignore
-from lark.exceptions import UnexpectedCharacters
+from lark.exceptions import UnexpectedInput
 
 from undate import Undate, UndateInterval
 from undate.converters.base import BaseCalendarConverter
@@ -111,7 +111,7 @@ def parse(self, value: str) -> Union[Undate, UndateInterval]:
             # set the original date as a label, with the calendar name
             undate_obj.label = f"{value} {self.calendar_name}"
             return undate_obj
-        except UnexpectedCharacters as err:
+        except UnexpectedInput as err:
             raise ValueError(f"Could not parse '{value}' as a Hebrew date") from err
 
     # do we need to support conversion the other direction?

diff --git a/src/undate/converters/calendars/hebrew/parser.py b/src/undate/converters/calendars/hebrew/parser.py
@@ -4,6 +4,7 @@
 
 grammar_path = GRAMMAR_FILE_PATH / "hebrew.lark"
 
-with open(grammar_path) as grammar:
-    # NOTE: LALR parser is faster but can't be used to ambiguity between years and dates
-    hebrew_parser = Lark(grammar.read(), start="hebrew_date", strict=True)
+# open based on filename to allow relative imports based on grammar file
+hebrew_parser = Lark.open(
+    str(grammar_path), rel_to=__file__, start="hebrew_date", strict=True
+)
diff --git a/src/undate/converters/calendars/islamic/converter.py b/src/undate/converters/calendars/islamic/converter.py
@@ -1,7 +1,7 @@
 from typing import Union
 
 from convertdate import islamic  # type: ignore
-from lark.exceptions import UnexpectedCharacters
+from lark.exceptions import UnexpectedInput
 
 from undate import Undate, UndateInterval
 from undate.converters.base import BaseCalendarConverter
@@ -97,7 +97,7 @@ def parse(self, value: str) -> Union[Undate, UndateInterval]:
             # set the original date as a label, with the calendar name
             undate_obj.label = f"{value} {self.calendar_name}"
             return undate_obj
-        except UnexpectedCharacters as err:
+        except UnexpectedInput as err:
             raise ValueError(f"Could not parse '{value}' as an Islamic date") from err
 
     # do we need to support conversion the other direction?

diff --git a/src/undate/converters/calendars/islamic/parser.py b/src/undate/converters/calendars/islamic/parser.py
@@ -4,6 +4,7 @@
 
 grammar_path = GRAMMAR_FILE_PATH / "islamic.lark"
 
-with open(grammar_path) as grammar:
-    # NOTE: LALR parser is faster but can't be used due to ambiguity between years and days
-    islamic_parser = Lark(grammar.read(), start="islamic_date", strict=True)
+# open based on filename to allow relative imports based on grammar file
+islamic_parser = Lark.open(
+    str(grammar_path), rel_to=__file__, start="islamic_date", strict=True
+)
diff --git a/src/undate/converters/combined.py b/src/undate/converters/combined.py
@@ -7,12 +7,13 @@
 from typing import Union
 
 from lark import Lark
-from lark.exceptions import UnexpectedCharacters
+from lark.exceptions import UnexpectedInput
 from lark.visitors import Transformer, merge_transformers
 
 from undate import Undate, UndateInterval
 from undate.converters import BaseDateConverter, GRAMMAR_FILE_PATH
 from undate.converters.edtf.transformer import EDTFTransformer
+from undate.converters.calendars.gregorian.transformer import GregorianDateTransformer
 from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer
 from undate.converters.calendars.islamic.transformer import IslamicDateTransformer
 
@@ -33,6 +34,7 @@ def start(self, children):
     edtf=EDTFTransformer(),
     hebrew=HebrewDateTransformer(),
     islamic=IslamicDateTransformer(),
+    gregorian=GregorianDateTransformer(),
 )
 
 
@@ -45,7 +47,7 @@ def start(self, children):
 class OmnibusDateConverter(BaseDateConverter):
     """
     Combination parser that aggregates existing parser grammars.
-    Currently supports EDTF, Hebrew, and Hijri  where dates are unambiguous.
+    Currently supports EDTF, Gregorian, Hebrew, and Hijri where dates are unambiguous.
     (Year-only dates are parsed as EDTF in Gregorian calendar.)
 
     Does not support serialization.
@@ -75,7 +77,7 @@ def parse(self, value: str) -> Union[Undate, UndateInterval]:
             parsetree = parser.parse(value)
             # transform returns a list; we want the first item in the list
             return self.transformer.transform(parsetree)[0]
-        except UnexpectedCharacters:
+        except UnexpectedInput:
             raise ValueError(
                 "Parsing failed: '%s' is not in a recognized date format" % value
             )

diff --git a/src/undate/converters/edtf/converter.py b/src/undate/converters/edtf/converter.py
@@ -1,6 +1,6 @@
 from typing import Optional, Union
 
-from lark.exceptions import UnexpectedCharacters
+from lark.exceptions import UnexpectedInput
 
 from undate import Undate, UndateInterval
 from undate.converters.base import BaseDateConverter
@@ -40,10 +40,10 @@ def parse(self, value: str) -> Union[Undate, UndateInterval]:
         try:
             parsetree = edtf_parser.parse(value)
             return self.transformer.transform(parsetree)
-        except UnexpectedCharacters:
+        except UnexpectedInput as err:
             raise ValueError(
-                "Parsing failed: '%s' is not a supported EDTF date format" % value
-            )
+                f"Parsing failed: '{value}' is not a supported EDTF date format"
+            ) from err
 
     def _convert_missing_digits(
         self, value: Optional[str], old_missing_digit: str

diff --git a/src/undate/converters/grammars/combined.lark b/src/undate/converters/grammars/combined.lark
@@ -1,7 +1,11 @@
 %import common.WS
 %ignore WS
 
-start: (edtf__start | hebrew__hebrew_date  | islamic__islamic_date )
+// Ignore periods and commas in dates
+%import .undate_common.DATE_PUNCTUATION
+%ignore DATE_PUNCTUATION
+
+start: (edtf__start | hebrew__hebrew_date  | islamic__islamic_date | gregorian__gregorian_date )
 
 // Renaming of the import variables is required, as they receive the namespace of this file.
 // See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565
@@ -23,10 +27,17 @@ start: (edtf__start | hebrew__hebrew_date  | islamic__islamic_date )
 %import .islamic.month -> islamic__month
 %import .islamic.year -> islamic__year
 
+// gregorian calendar, in multiple languages
+%import .gregorian.gregorian_date -> gregorian__gregorian_date
+
 
 // override hebrew date to omit year-only, since year without calendar is ambiguous
 // NOTE: potentially support year with calendar label
-%override hebrew__hebrew_date: hebrew__day hebrew__month hebrew__year | hebrew__month hebrew__year 
+%override hebrew__hebrew_date: hebrew__day hebrew__month hebrew__year | hebrew__month hebrew__year
 
 // same for islamic date, year alone is ambiguous
-%override islamic__islamic_date: islamic__day islamic__month islamic__year | islamic__month islamic__year 
+%override islamic__islamic_date: islamic__day islamic__month islamic__year | islamic__month islamic__year
+
+// same as above. omit year only, since covered by EDTF
+// %override gregorian__gregorian_date: day month year | month day year | year month day | month year | year month |  day month | month day
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from undate.converters.calendars.gregorian.converter import GregorianDateConverter

		__all__ = ["GregorianDateConverter"]