root/CPS3/legacy/CPSCollector/trunk/strptime.py

Revision 1902, 17.6 kB (checked in by dwyart, 6 years ago)

Bad fun call : a param was missing. Found when using the product
in a Windows Zope installation

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1 """Implementation of time.strptime().
2
3 This version was written to work in Jython 2.1 .  If you are running CPython
4 2.2.x or newer, please use the version of Modules/timemodule.c and
5 Lib/_strptime.py as found in CVS for CPython 2.3.x (CPython 2.2.0 requires
6 defining ``True = 1; False = 0``).
7
8 The main changes from the version in CVS for CPython 2.3.x is a caching
9 mechanism which improves performance dramatically.  LocaleTime().timezone is
10 also a true set and thus does not require all the tweaking of using a version
11 of Python lacking iterators.  List comprehensions were also removed from the
12 code.  All date-calculating code was made conditional based on requiring
13 datetime.  If you need the calculations you can find the code in the Python CVS
14 repository in the datetime code or in the dead-tree version of the 'Python
15 Cookbook' under the recipe for strptime() (last recipe in the book).
16 Subclassing of object and dict were also removed.
17
18 $Last Edit: 2003-09-15 $
19
20 """
21 import time
22 try:
23     import locale
24 except ImportError:
25     class FakeLocale:
26
27         """Faked locale module (for Jython compatibility)."""
28
29         LC_TIME = None
30        
31         def getlocale(self, whatever):
32             return (whatever, whatever)
33
34     locale = FakeLocale()
35    
36 import calendar
37 from re import compile as re_compile
38 from re import IGNORECASE
39 # Get datetime from Python's CVS: /python/nondist/sandbox/datetime/
40 try:
41     from datetime import date as datetime_date
42 except ImportError:
43     datetime_date = None
44 from thread import allocate_lock as _thread_allocate_lock
45
46 __author__ = "Brett Cannon"
47 __email__ = "brett@python.org"
48
49 __all__ = ['strptime']
50
51 # ----- START Code to replace Python 2.3 functionality -----
52 True, False = 1, 0
53
54 def sets_ImmutableSet(iterable):
55     sets_dict = {}
56     for item in iterable:
57         sets_dict[item] = None
58     return sets_dict
59
60 def enumerate(iterable):
61     """Python 2.1-compatible enumerate function.
62
63     """
64     enum_list = []
65     count = 0
66     for item in iterable:
67         enum_list.append((count, item))
68         count += 1
69     return enum_list
70
71 # ----- END Code to replace Python 2.3 functionality -----
72
73 def _getlang():
74     # Figure out what the current language is set to.
75     return locale.getlocale(locale.LC_TIME)
76
77 class LocaleTime:
78     """Stores and handles locale-specific information related to time.
79
80     ATTRIBUTES:
81         f_weekday -- full weekday names (7-item list)
82         a_weekday -- abbreviated weekday names (7-item list)
83         f_month -- full month names (13-item list; dummy value in [0], which
84                     is added by code)
85         a_month -- abbreviated month names (13-item list, dummy value in
86                     [0], which is added by code)
87         am_pm -- AM/PM representation (2-item list)
88         LC_date_time -- format string for date/time representation (string)
89         LC_date -- format string for date representation (string)
90         LC_time -- format string for time representation (string)
91         timezone -- daylight- and non-daylight-savings timezone representation
92                     (2-item list of sets)
93         lang -- Language used by instance (2-item tuple)
94     """
95
96     def __init__(self):
97         """Set all attributes.
98        
99         Order of methods called matters for dependency reasons.
100
101         The locale language is set at the offset and then checked again before
102         exiting.  This is to make sure that the attributes were not set with a
103         mix of information from more than one locale.  This would most likely
104         happen when using threads where one thread calls a locale-dependent
105         function while another thread changes the locale while the function in
106         the other thread is still running.  Proper coding would call for
107         locks to prevent changing the locale while locale-dependent code is
108         running.  The check here is done in case someone does not think about
109         doing this.
110
111         Only other possible issue is if someone changed the timezone and did
112         not call tz.tzset .  That is an issue for the programmer, though,
113         since changing the timezone is worthless without that call.
114        
115         """
116         self.lang = _getlang()
117         self.__calc_weekday()
118         self.__calc_month()
119         self.__calc_am_pm()
120         self.__calc_timezone()
121         self.__calc_date_time()
122         if _getlang() != self.lang:
123             raise ValueError("locale changed during initialization")
124
125     def __pad(self, seq, front):
126         # Add '' to seq to either the front (is True), else the back.
127         seq = list(seq)
128         if front:
129             seq.insert(0, '')
130         else:
131             seq.append('')
132         return seq
133
134     def __calc_weekday(self):
135         # Set self.a_weekday and self.f_weekday using the calendar
136         # module.
137         a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
138         f_weekday = [calendar.day_name[i].lower() for i in range(7)]
139         self.a_weekday = a_weekday
140         self.f_weekday = f_weekday
141
142     def __calc_month(self):
143         # Set self.f_month and self.a_month using the calendar module.
144         a_month = [calendar.month_abbr[i].lower() for i in range(13)]
145         f_month = [calendar.month_name[i].lower() for i in range(13)]
146         self.a_month = a_month
147         self.f_month = f_month
148
149     def __calc_am_pm(self):
150         # Set self.am_pm by using time.strftime().
151
152         # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
153         # magical; just happened to have used it everywhere else where a
154         # static date was needed.
155         am_pm = []
156         for hour in (01,22):
157             time_tuple = (1999,3,17,hour,44,55,2,76,0)
158             am_pm.append(time.strftime("%p", time_tuple).lower())
159         self.am_pm = am_pm
160
161     def __calc_date_time(self):
162         # Set self.date_time, self.date, & self.time by using
163         # time.strftime().
164
165         # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
166         # overloaded numbers is minimized.  The order in which searches for
167         # values within the format string is very important; it eliminates
168         # possible ambiguity for what something represents.
169         time_tuple = (1999,3,17,22,44,55,2,76,0)
170         date_time = [None, None, None]
171         date_time[0] = time.strftime("%c", time_tuple).lower()
172         date_time[1] = time.strftime("%x", time_tuple).lower()
173         date_time[2] = time.strftime("%X", time_tuple).lower()
174         replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'),
175                     (self.f_month[3], '%B'), (self.a_weekday[2], '%a'),
176                     (self.a_month[3], '%b'), (self.am_pm[1], '%p'),
177                     ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
178                     ('44', '%M'), ('55', '%S'), ('76', '%j'),
179                     ('17', '%d'), ('03', '%m'), ('3', '%m'),
180                     # '3' needed for when no leading zero.
181                     ('2', '%w'), ('10', '%I')]
182         for tz_values in self.timezone:
183             for tz in tz_values.keys():
184                 replacement_pairs.append((tz, "%Z"))
185         for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
186             current_format = date_time[offset]
187             for old, new in replacement_pairs:
188                 # Must deal with possible lack of locale info
189                 # manifesting itself as the empty string (e.g., Swedish's
190                 # lack of AM/PM info) or a platform returning a tuple of empty
191                 # strings (e.g., MacOS 9 having timezone as ('','')).
192                 if old:
193                     current_format = current_format.replace(old, new)
194             time_tuple = (1999,1,3,1,1,1,6,3,0)
195             if time.strftime(directive, time_tuple).find('00'):
196                 U_W = '%U'
197             else:
198                 U_W = '%W'
199             date_time[offset] = current_format.replace('11', U_W)
200         self.LC_date_time = date_time[0]
201         self.LC_date = date_time[1]
202         self.LC_time = date_time[2]
203
204     def __calc_timezone(self):
205         # Set self.timezone by using time.tzname.
206         # Do not worry about possibility of time.tzname[0] == timetzname[1]
207         # and time.daylight; handle that in strptime .
208         try:
209             time.tzset()
210         except AttributeError:
211             pass
212         no_saving = sets_ImmutableSet(["utc", "gmt", time.tzname[0].lower()])
213         if time.daylight:
214             has_saving = sets_ImmutableSet([time.tzname[1].lower()])
215         else:
216             has_saving = sets_ImmutableSet([])
217         self.timezone = (no_saving, has_saving)
218
219
220 import UserDict
221 class TimeRE(UserDict.UserDict):
222     """Handle conversion from format directives to regexes."""
223
224     def __init__(self, locale_time=None):
225         """Create keys/values.
226        
227         Order of execution is important for dependency reasons.
228        
229         """
230         if locale_time:
231             self.locale_time = locale_time
232         else:
233             self.locale_time = LocaleTime()
234         base = UserDict.UserDict
235         base.__init__(self, {
236             # The " \d" part of the regex is to make %c from ANSI C work
237             'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
238             'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
239             'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
240 'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
241             'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
242             'M': r"(?P<M>[0-5]\d|\d)",
243             'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
244             'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
245             'w': r"(?P<w>[0-6])",
246             # W is set below by using 'U'
247             'y': r"(?P<y>\d\d)",
248             #XXX: Does 'Y' need to worry about having less or more than
249             #     4 digits?
250             'Y': r"(?P<Y>\d\d\d\d)",
251             'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
252             'a': self.__seqToRE(self.locale_time.a_weekday, 'a'),
253             'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
254             'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
255             'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
256             '%': '%'})
257         temp_list = []
258         for tz_names in self.locale_time.timezone:
259             for tz in tz_names.keys():
260                 temp_list.append(tz)
261         base.__setitem__(self, 'Z', self.__seqToRE(temp_list, 'Z'))
262         base.__setitem__(self, 'W', base.__getitem__(self, 'U'))
263         base.__setitem__(self, 'c', self.pattern(self.locale_time.LC_date_time))
264         base.__setitem__(self, 'x', self.pattern(self.locale_time.LC_date))
265         base.__setitem__(self, 'X', self.pattern(self.locale_time.LC_time))
266
267     def __seqToRE(self, to_convert, directive):
268         """Convert a list to a regex string for matching a directive.
269        
270         Want possible matching values to be from longest to shortest.  This
271         prevents the possibility of a match occuring for a value that also
272         a substring of a larger value that should have matched (e.g., 'abc'
273         matching when 'abcdef' should have been the match).
274        
275         """
276         for value in to_convert:
277             if value != '':
278                 break
279         else:
280             return ''
281         to_sort = [(len(item), item) for item in to_convert]
282         to_sort.sort()
283         to_sort.reverse()
284         to_convert = [item for length, item in to_sort]
285         regex = '|'.join(to_convert)
286         regex = '(?P<%s>%s' % (directive, regex)
287         return '%s)' % regex
288
289     def pattern(self, format):
290         """Return regex pattern for the format string.
291
292         Need to make sure that any characters that might be interpreted as
293         regex syntax are escaped.
294
295         """
296         processed_format = ''
297         # The sub() call escapes all characters that might be misconstrued
298         # as regex syntax.
299         regex_chars = re_compile(r"([\\.^$*+?{}\[\]|])")
300         format = regex_chars.sub(r"\\\1", format)
301         whitespace_replacement = re_compile('\s+')
302         format = whitespace_replacement.sub('\s*', format)
303         while format.find('%') != -1:
304             directive_index = format.index('%')+1
305             processed_format = "%s%s%s" % (processed_format,
306                                            format[:directive_index-1],
307                                            self[format[directive_index]])
308             format = format[directive_index+1:]
309         return "%s%s" % (processed_format, format)
310
311     def compile(self, format):
312         """Return a compiled re object for the format string."""
313         return re_compile(self.pattern(format), IGNORECASE)
314
315 _cache_lock = _thread_allocate_lock()
316 # DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
317 # first!
318 _TimeRE_cache = TimeRE()
319 _CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache
320 _regex_cache = {}
321
322 def strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
323     """Return a time struct based on the input string and the format string."""
324     global _TimeRE_cache
325     _cache_lock.acquire()
326     try:
327         time_re = _TimeRE_cache
328         locale_time = time_re.locale_time
329         if _getlang() != locale_time.lang:
330             _TimeRE_cache = TimeRE()
331         if len(_regex_cache) > _CACHE_MAX_SIZE:
332             _regex_cache.clear()
333         format_regex = _regex_cache.get(format)
334         if not format_regex:
335             format_regex = time_re.compile(format)
336             _regex_cache[format] = format_regex
337     finally:
338         _cache_lock.release()
339     found = format_regex.match(data_string)
340     if not found:
341         raise ValueError("time data did not match format:  data=%s  fmt=%s" %
342                          (data_string, format))
343     if len(data_string) != found.end():
344         raise ValueError("unconverted data remains: %s" %
345                           data_string[found.end():])
346     year = 1900
347     month = day = 1
348     hour = minute = second = 0
349     tz = -1
350     # weekday and julian defaulted to -1 so as to signal need to calculate values
351     weekday = julian = -1
352     found_dict = found.groupdict()
353     for group_key in found_dict.keys():
354         if group_key == 'y':
355             year = int(found_dict['y'])
356             # Open Group specification for strptime() states that a %y
357             #value in the range of [00, 68] is in the century 2000, while
358             #[69,99] is in the century 1900
359             if year <= 68:
360                 year += 2000
361             else:
362                 year += 1900
363         elif group_key == 'Y':
364             year = int(found_dict['Y'])
365         elif group_key == 'm':
366             month = int(found_dict['m'])
367         elif group_key == 'B':
368             month = locale_time.f_month.index(found_dict['B'].lower())
369         elif group_key == 'b':
370             month = locale_time.a_month.index(found_dict['b'].lower())
371         elif group_key == 'd':
372             day = int(found_dict['d'])
373         elif group_key == 'H':
374             hour = int(found_dict['H'])
375         elif group_key == 'I':
376             hour = int(found_dict['I'])
377             ampm = found_dict.get('p', '').lower()
378             # If there was no AM/PM indicator, we'll treat this like AM
379             if ampm in ('', locale_time.am_pm[0]):
380                 # We're in AM so the hour is correct unless we're
381                 # looking at 12 midnight.
382                 # 12 midnight == 12 AM == hour 0
383                 if hour == 12:
384                     hour = 0
385             elif ampm == locale_time.am_pm[1]:
386                 # We're in PM so we need to add 12 to the hour unless
387                 # we're looking at 12 noon.
388                 # 12 noon == 12 PM == hour 12
389                 if hour != 12:
390                     hour += 12
391         elif group_key == 'M':
392             minute = int(found_dict['M'])
393         elif group_key == 'S':
394             second = int(found_dict['S'])
395         elif group_key == 'A':
396             weekday = locale_time.f_weekday.index(found_dict['A'].lower())
397         elif group_key == 'a':
398             weekday = locale_time.a_weekday.index(found_dict['a'].lower())
399         elif group_key == 'w':
400             weekday = int(found_dict['w'])
401             if weekday == 0:
402                 weekday = 6
403             else:
404                 weekday -= 1
405         elif group_key == 'j':
406             julian = int(found_dict['j'])
407         elif group_key == 'Z':
408             # Since -1 is default value only need to worry about setting tz if
409             # it can be something other than -1.
410             found_zone = found_dict['Z'].lower()
411             for value, tz_values in enumerate(locale_time.timezone):
412                 if found_zone in tz_values:
413                     # Deal with bad locale setup where timezone names are the
414                     # same and yet time.daylight is true; too ambiguous to
415                     # be able to tell what timezone has daylight savings
416                     if time.tzname[0] == time.tzname[1] and \
417                        time.daylight:
418                             break
419                     else:
420                         tz = value
421                         break
422     # Cannot pre-calculate datetime_date() since can change in Julian
423     #calculation and thus could have different value for the day of the week
424     #calculation
425     if datetime_date:
426         if julian == -1:
427             # Need to add 1 to result since first day of the year is 1, not 0.
428             julian = datetime_date(year, month, day).toordinal() - \
429                       datetime_date(year, 1, 1).toordinal() + 1
430         else:  # Assume that if they bothered to include Julian day it will
431                #be accurate
432             datetime_result = datetime_date.fromordinal((julian - 1) + datetime_date(year, 1, 1).toordinal())
433             year = datetime_result.year
434             month = datetime_result.month
435             day = datetime_result.day
436         if weekday == -1:
437             weekday = datetime_date(year, month, day).weekday()
438     return (year, month, day, hour, minute, second, weekday, julian, tz)
Note: See TracBrowser for help on using the browser.