| # Copyright (C) all contributors <meta@public-inbox.org> |
| # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> |
| |
| # Various date/time-related functions |
| package PublicInbox::MsgTime; |
| use v5.10.1; # unicode_strings in 5.12 may not work... |
| use strict; |
| use parent qw(Exporter); |
| our @EXPORT_OK = qw(msg_timestamp msg_datestamp); |
| use Time::Local qw(timegm); |
| my @MoY = qw(january february march april may june |
| july august september october november december); |
| my %MoY; |
| @MoY{@MoY} = (0..11); |
| @MoY{map { substr($_, 0, 3) } @MoY} = (0..11); |
| |
| my %OBSOLETE_TZ = ( # RFC2822 4.3 (Obsolete Date and Time) |
| EST => '-0500', EDT => '-0400', |
| CST => '-0600', CDT => '-0500', |
| MST => '-0700', MDT => '-0600', |
| PST => '-0800', PDT => '-0700', |
| UT => '+0000', GMT => '+0000', Z => '+0000', |
| |
| # RFC2822 states: |
| # The 1 character military time zones were defined in a non-standard |
| # way in [RFC822] and are therefore unpredictable in their meaning. |
| ); |
| my $OBSOLETE_TZ = join('|', keys %OBSOLETE_TZ); |
| |
| sub str2date_zone ($) { |
| my ($date) = @_; |
| my ($ts, $zone); |
| |
| # RFC822 is most likely for email, but we can tolerate an extra comma |
| # or punctuation as long as all the data is there. |
| # We'll use '\s' since Unicode spaces won't affect our parsing. |
| # SpamAssassin ignores commas and redundant spaces, too. |
| if ($date =~ /(?:[A-Za-z]+,?\s+)? # day-of-week |
| ([0-9]+),?\s+ # dd |
| ([A-Za-z]+)\s+ # mon |
| ([0-9]{2,4})\s+ # YYYY or YY (or YYY :P) |
| ([0-9]+)[:\.] # HH: |
| ((?:[0-9]{2})|(?:\s?[0-9])) # MM |
| (?:[:\.]((?:[0-9]{2})|(?:\s?[0-9])))? # :SS |
| \s+ # a TZ offset is required: |
| ([\+\-])? # TZ sign |
| [\+\-]* # I've seen extra "-" e.g. "--500" |
| ([0-9]+|$OBSOLETE_TZ)(?:\s|$) # TZ offset |
| /xo) { |
| my ($dd, $m, $yyyy, $hh, $mm, $ss, $sign, $tz) = |
| ($1, $2, $3, $4, $5, $6, $7, $8); |
| # don't accept non-English months |
| defined(my $mon = $MoY{lc($m)}) or return; |
| |
| if (defined(my $off = $OBSOLETE_TZ{$tz})) { |
| $sign = substr($off, 0, 1); |
| $tz = substr($off, 1); |
| } |
| |
| # Y2K problems: 3-digit years, follow RFC2822 |
| if (length($yyyy) <= 3) { |
| $yyyy += 1900; |
| |
| # and 2-digit years from '09 (2009) (0..49) |
| $yyyy += 100 if $yyyy < 1950; |
| } |
| |
| $ts = timegm($ss // 0, $mm, $hh, $dd, $mon, $yyyy); |
| |
| # 4-digit dates in non-spam from 1900s and 1910s exist in |
| # lore archives |
| return if $ts < 0; |
| |
| # Compute the time offset from [+-]HHMM |
| $tz //= 0; |
| my ($tz_hh, $tz_mm); |
| if (length($tz) == 1) { |
| $tz_hh = $tz; |
| $tz_mm = 0; |
| } elsif (length($tz) == 2) { |
| $tz_hh = 0; |
| $tz_mm = $tz; |
| } else { |
| $tz_hh = $tz; |
| $tz_hh =~ s/([0-9]{2})\z//; |
| $tz_mm = $1; |
| } |
| while ($tz_mm >= 60) { |
| $tz_mm -= 60; |
| $tz_hh += 1; |
| } |
| $sign //= '+'; |
| my $off = $sign . ($tz_mm * 60 + ($tz_hh * 60 * 60)); |
| $ts -= $off; |
| $sign = '+' if $off == 0; |
| $zone = sprintf('%s%02d%02d', $sign, $tz_hh, $tz_mm); |
| |
| # Time::Zone and Date::Parse are part of the same distribution, |
| # and we need Time::Zone to deal with tz names like "EDT" |
| } elsif (eval { require Date::Parse }) { |
| $ts = Date::Parse::str2time($date); |
| return undef unless(defined $ts); |
| |
| # off is the time zone offset in seconds from GMT |
| my ($ss,$mm,$hh,$day,$month,$year,$off) = |
| Date::Parse::strptime($date); |
| return unless defined($year); |
| $off //= 0; |
| |
| # Compute the time zone from offset |
| my $sign = ($off < 0) ? '-' : '+'; |
| my $hour = abs(int($off / 3600)); |
| my $min = ($off / 60) % 60; |
| |
| # deal with weird offsets like '-0420' properly |
| $min = 60 - $min if ($min && $off < 0); |
| |
| $zone = sprintf('%s%02d%02d', $sign, $hour, $min); |
| } else { |
| warn "Date::Parse missing for non-RFC822 date: $date\n"; |
| return undef; |
| } |
| |
| # Note: we've already applied the offset to $ts at this point, |
| # but we want to keep "git fsck" happy. |
| # "-1200" is the furthest westermost zone offset, |
| # but git fast-import is liberal so we use "-1400" |
| $zone = '+0000' if $zone >= 1400 || $zone <= -1400; |
| [$ts, $zone]; |
| } |
| |
| sub time_response ($) { |
| my ($ret) = @_; |
| wantarray ? @$ret : $ret->[0]; |
| } |
| |
| sub msg_received_at ($) { |
| my ($eml) = @_; |
| my $ts; |
| for my $r ($eml->header_raw('Received')) { |
| $r =~ /\s*([0-9]+\s+[a-zA-Z]+\s+[0-9]{2,4}\s+ |
| [0-9]+[^0-9][0-9]+(?:[^0-9][0-9]+) |
| \s+(?:[\+\-][0-9]+))/sx or next; |
| $ts = eval { str2date_zone($1) } and return $ts; |
| } |
| undef; |
| } |
| |
| sub msg_date_only ($) { |
| my ($eml) = @_; |
| my $ts; |
| for my $d ($eml->header_raw('Date')) { |
| $ts = eval { str2date_zone($d) } and return $ts; |
| } |
| undef; |
| } |
| |
| # Favors Received header for sorting globally |
| sub msg_timestamp ($;$) { |
| my ($eml, $fallback) = @_; |
| time_response(msg_received_at($eml) // msg_date_only($eml) // |
| [ $fallback // time, '+0000' ]); |
| } |
| |
| # Favors the Date: header for display and sorting within a thread |
| sub msg_datestamp ($;$) { |
| my ($eml, $fallback) = @_; # PublicInbox::Eml |
| time_response(msg_date_only($eml) // msg_received_at($eml) // |
| [ $fallback // time, '+0000' ]); |
| } |
| |
| 1; |