1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """
25 Date parsing class. Serves as the base class for any localized
26 date parsing class. The default, base class provides parsing for
27 English.
28 """
29
30 __author__ = "Donald N. Allingham"
31 __version__ = "$Revision: 8054 $"
32
33
34
35
36
37
38 import re
39 import calendar
40
41
42
43
44
45
46 import logging
47 log = logging.getLogger(".DateParser")
48
49
50
51
52
53
54 from RelLib import Date, DateError
55 import GrampsLocale
56
57
58
59
60
61
62 _max_days = [ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 ]
63 _leap_days = [ 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 ]
64
66 day = date_tuple[0]
67 month = date_tuple[1]
68 valid = True
69 try:
70 if month > 12:
71 valid = False
72 elif calendar.isleap(date_tuple[2]):
73 if day > _leap_days[month-1]:
74 valid = False
75 elif day > _max_days[month-1]:
76 valid = False
77 except:
78 valid = False
79 return valid
80
81
82
83
84
85
87 """
88 Converts a text string into a Date object. If the date cannot be
89 converted, the text string is assigned.
90 """
91
92 _fmt_parse = re.compile(".*%(\S).*%(\S).*%(\S).*")
93
94
95 _rfc_days = ('Sun','Mon','Tue','Wed','Thu','Fri','Sat')
96 _rfc_mons_to_int = {
97 'Jan' : 1, 'Feb' : 2, 'Mar' : 3, 'Apr' : 4,
98 'May' : 5, 'Jun' : 6, 'Jul' : 7, 'Aug' : 8,
99 'Sep' : 9, 'Oct' : 10, 'Nov' : 11, 'Dec' : 12,
100 }
101
102 month_to_int = GrampsLocale.month_to_int
103
104
105 modifier_to_int = {
106 'before' : Date.MOD_BEFORE, 'bef' : Date.MOD_BEFORE,
107 'bef.' : Date.MOD_BEFORE, 'after' : Date.MOD_AFTER,
108 'aft' : Date.MOD_AFTER, 'aft.' : Date.MOD_AFTER,
109 'about' : Date.MOD_ABOUT, 'abt.' : Date.MOD_ABOUT,
110 'abt' : Date.MOD_ABOUT, 'circa' : Date.MOD_ABOUT,
111 'c.' : Date.MOD_ABOUT, 'around' : Date.MOD_ABOUT,
112 }
113
114
115 modifier_after_to_int = {}
116
117 hebrew_to_int = {
118 "tishri" : 1, "heshvan" : 2, "kislev" : 3,
119 "tevet" : 4, "shevat" : 5, "adari" : 6,
120 "adarii" : 7, "nisan" : 8, "iyyar" : 9,
121 "sivan" : 10, "tammuz" : 11, "av" : 12,
122 "elul" : 13,
123 }
124
125 french_to_int = {
126 u'vendémiaire' : 1, u'brumaire' : 2,
127 u'frimaire' : 3, u'nivôse': 4,
128 u'pluviôse' : 5, u'ventôse' : 6,
129 u'germinal' : 7, u'floréal' : 8,
130 u'prairial' : 9, u'messidor' : 10,
131 u'thermidor' : 11, u'fructidor' : 12,
132 u'extra' : 13
133 }
134
135 islamic_to_int = {
136 "muharram" : 1, "muharram ul haram" : 1,
137 "safar" : 2, "rabi`al-awwal" : 3,
138 "rabi'l" : 3, "rabi`ul-akhir" : 4,
139 "rabi`ath-thani" : 4, "rabi` ath-thani" : 4,
140 "rabi`al-thaany" : 4, "rabi` al-thaany" : 4,
141 "rabi' ii" : 4, "jumada l-ula" : 5,
142 "jumaada-ul-awwal" : 5, "jumaada i" : 5,
143 "jumada t-tania" : 6, "jumaada-ul-akhir" : 6,
144 "jumaada al-thaany" : 6, "jumaada ii" : 5,
145 "rajab" : 7, "sha`ban" : 8,
146 "sha`aban" : 8, "ramadan" : 9,
147 "ramadhan" : 9, "shawwal" : 10,
148 "dhu l-qa`da" : 11, "dhu qadah" : 11,
149 "thw al-qi`dah" : 11, "dhu l-hijja" : 12,
150 "dhu hijja" : 12, "thw al-hijjah" : 12,
151 }
152
153 persian_to_int = {
154 "farvardin" : 1, "ordibehesht" : 2,
155 "khordad" : 3, "tir" : 4,
156 "mordad" : 5, "shahrivar" : 6,
157 "mehr" : 7, "aban" : 8,
158 "azar" : 9, "dey" : 10,
159 "bahman" : 11, "esfand" : 12,
160 }
161
162 bce = ["B.C.E.", "B.C.E", "BCE", "B.C.", "B.C", "BC" ]
163
164 calendar_to_int = {
165 'gregorian' : Date.CAL_GREGORIAN,
166 'g' : Date.CAL_GREGORIAN,
167 'julian' : Date.CAL_JULIAN,
168 'j' : Date.CAL_JULIAN,
169 'hebrew' : Date.CAL_HEBREW,
170 'h' : Date.CAL_HEBREW,
171 'islamic' : Date.CAL_ISLAMIC,
172 'i' : Date.CAL_ISLAMIC,
173 'french' : Date.CAL_FRENCH,
174 'french republican': Date.CAL_FRENCH,
175 'f' : Date.CAL_FRENCH,
176 'persian' : Date.CAL_PERSIAN,
177 'p' : Date.CAL_PERSIAN,
178 }
179
180 quality_to_int = {
181 'estimated' : Date.QUAL_ESTIMATED,
182 'est.' : Date.QUAL_ESTIMATED,
183 'est' : Date.QUAL_ESTIMATED,
184 'calc.' : Date.QUAL_CALCULATED,
185 'calc' : Date.QUAL_CALCULATED,
186 'calculated' : Date.QUAL_CALCULATED,
187 }
188
190 self.init_strings()
191 self.parser = {
192 Date.CAL_GREGORIAN : self._parse_greg_julian,
193 Date.CAL_JULIAN : self._parse_greg_julian,
194 Date.CAL_FRENCH : self._parse_french,
195 Date.CAL_PERSIAN : self._parse_persian,
196 Date.CAL_HEBREW : self._parse_hebrew,
197 Date.CAL_ISLAMIC : self._parse_islamic,
198 }
199
200 fmt = GrampsLocale.tformat
201 match = self._fmt_parse.match(fmt.lower())
202 if match:
203 self.dmy = (match.groups() == ('d','m','y'))
204 self.ymd = (match.groups() == ('y','m','d'))
205 else:
206 self.dmy = True
207 self.ymd = False
208
210 """
211 returns a string for a RE group which contains the given keys
212 sorted so that longest keys match first. Any '.' characters
213 are quoted.
214 """
215 keys.sort(lambda x, y: cmp(len(y), len(x)))
216 return '(' + '|'.join([key.replace('.','\.') for key in keys]) + ')'
217
219 """
220 This method compiles regular expression strings for matching dates.
221
222 Most of the re's in most languages can stay as is. span and range
223 most likely will need to change. Whatever change is done, this method
224 may be called first as DateParser.init_strings(self) so that the
225 invariant expresions don't need to be repeteadly coded. All differences
226 can be coded after DateParser.init_strings(self) call, that way they
227 override stuff from this method. See DateParserRU() as an example.
228 """
229 self._rfc_mon_str = '(' + '|'.join(self._rfc_mons_to_int.keys()) + ')'
230 self._rfc_day_str = '(' + '|'.join(self._rfc_days) + ')'
231
232 self._bce_str = self.re_longest_first(self.bce)
233 self._qual_str = self.re_longest_first(self.quality_to_int.keys())
234 self._mod_str = self.re_longest_first(self.modifier_to_int.keys())
235 self._mod_after_str = self.re_longest_first(
236 self.modifier_after_to_int.keys())
237
238 self._mon_str = self.re_longest_first(self.month_to_int.keys())
239 self._jmon_str = self.re_longest_first(self.hebrew_to_int.keys())
240 self._fmon_str = self.re_longest_first(self.french_to_int.keys())
241 self._pmon_str = self.re_longest_first(self.persian_to_int.keys())
242 self._imon_str = self.re_longest_first(self.islamic_to_int.keys())
243 self._cal_str = self.re_longest_first(self.calendar_to_int.keys())
244
245
246
247
248 self._bce_re = re.compile("(.*)\s+%s( ?.*)" % self._bce_str)
249
250 self._cal = re.compile("(.*)\s+\(%s\)( ?.*)" % self._cal_str,
251 re.IGNORECASE)
252 self._qual = re.compile("(.* ?)%s\s+(.+)" % self._qual_str,
253 re.IGNORECASE)
254
255 self._span = re.compile("(from)\s+(?P<start>.+)\s+to\s+(?P<stop>.+)",
256 re.IGNORECASE)
257 self._range = re.compile("(bet|bet.|between)\s+(?P<start>.+)\s+and\s+(?P<stop>.+)",
258 re.IGNORECASE)
259 self._modifier = re.compile('%s\s+(.*)' % self._mod_str,
260 re.IGNORECASE)
261 self._modifier_after = re.compile('(.*)\s+%s' % self._mod_after_str,
262 re.IGNORECASE)
263 self._abt2 = re.compile('<(.*)>',re.IGNORECASE)
264 self._text = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?\s*$' % self._mon_str,
265 re.IGNORECASE)
266 self._text2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?\s*$' % self._mon_str,
267 re.IGNORECASE)
268 self._jtext = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?\s*$' % self._jmon_str,
269 re.IGNORECASE)
270 self._jtext2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?\s*$' % self._jmon_str,
271 re.IGNORECASE)
272 self._ftext = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?\s*$' % self._fmon_str,
273 re.IGNORECASE)
274 self._ftext2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?\s*$' % self._fmon_str,
275 re.IGNORECASE)
276 self._ptext = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?\s*$' % self._pmon_str,
277 re.IGNORECASE)
278 self._ptext2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?\s*$' % self._pmon_str,
279 re.IGNORECASE)
280 self._itext = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?\s*$' % self._imon_str,
281 re.IGNORECASE)
282 self._itext2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?\s*$' % self._imon_str,
283 re.IGNORECASE)
284 self._numeric = re.compile("((\d+)[/\.]\s*)?((\d+)[/\.]\s*)?(\d+)\s*$")
285 self._iso = re.compile("(\d+)(/(\d+))?-(\d+)-(\d+)\s*$")
286 self._rfc = re.compile("(%s,)?\s+(\d|\d\d)\s+%s\s+(\d+)\s+\d\d:\d\d(:\d\d)?\s+(\+|-)\d\d\d\d"
287 % (self._rfc_day_str,self._rfc_mon_str))
288
290 """
291 Converts the string to an integer if the value is not None. If the
292 value is None, a zero is returned
293 """
294 if val == None:
295 return 0
296 else:
297 return int(val)
298
300 return self._parse_calendar(text,self._jtext,self._jtext2,
301 self.hebrew_to_int)
302
304 return self._parse_calendar(text,self._itext,self._itext2,
305 self.islamic_to_int)
306
308 return self._parse_calendar(text,self._ptext,self._ptext2,
309 self.persian_to_int)
310
312 return self._parse_calendar(text,self._ftext,self._ftext2,
313 self.french_to_int)
314
318