00001 package MMAgic::Log::Parser;
00002 #
00003 # MMAgic/Log/Parser.pm
00004 #
00005 # Parse log files.
00006 #
00007
00008 use Date::Manip;
00009 use File::Basename;
00010 use Time::Local;
00011 use UNIVERSAL qw(isa);
00012
00013 use MMAgic::Log;
00014 use MMAgic::Tie::Hash::Fuzzy;
00015 use MMAgic::Trace;
00016
00017 # Would include these from Fcntl but stuck w/Perl 5.003 on Linux 6.1:
00018 sub SEEK_SET { 0 }
00019 sub SEEK_CUR { 1 }
00020 sub SEEK_END { 2 }
00021
00022 $nameLen = 8;
00023 $granule = 1024;
00024 $idlesec = 7200; # two hours
00025
00026 tie %month, 'MMAgic::Tie::Hash::Fuzzy';
00027 map {
00028 $month{qr/^$_/i} = int($month);
00029 $month++;
00030 } qw(jan feb mar apr may jun jul aug sep oct nov dec);
00031
00032 ###########################################################################
00033 ###########################################################################
00034 sub new # $class, %init
00035 #
00036 {
00037 my $clss = shift;
00038 #!# my $auto = Log->method('%', @_)->block;
00039 my %init = @_;
00040
00041 Log->error('File ', $init{path}, ' doesn\'t exist.'),
00042 return undef
00043 unless -f $init{path};
00044
00045 Log->error('File ', $init{path}, ' isn\'t readable.'),
00046 return undef
00047 unless -r $init{path};
00048
00049 my $hndl = new IO::File($init{path});
00050
00051 Log->error('Unable to open ', $init{path}, ' for reading'),
00052 return undef
00053 unless isa($hndl, 'IO::File');
00054
00055 my ($name, $path, $suff) = fileparse($init{path}, '.[^\.]*');
00056
00057 $name = $init{name} if $init{name};
00058
00059 if ($suff =~ /^\.\d*$/)
00060 {
00061 # Suffix is all digits, map over filename:
00062 $name .= ' ' while length($name) < $nameLen;
00063 $suff =~ s/^\.0*(\d)$/$1/;
00064 substr($name, $nameLen - length($suff), length($suff)) = $suff;
00065 }
00066
00067 if ($init{type})
00068 {
00069 $clss = __PACKAGE__ . "::$init{type}";
00070 eval "require $clss";
00071
00072 Log->error('Unable to load class ', $clss, "\n $@"),
00073 return undef
00074 if $@;
00075 }
00076
00077 my $self = bless
00078 {
00079 catchUp => $init{catchUp},
00080 handle => $hndl,
00081 name => $name,
00082 pathname => $init{path},
00083 parsed => [ ],
00084 patterns => [ ],
00085 queue => [ ],
00086 virgin => 1,
00087 }, $clss;
00088
00089 map {
00090 push @{$self->{patterns}},
00091 isa($_, 'Regexp') ? $_ : qr($_);
00092 } isa($init{'grep'}, 'ARRAY') ? @{$init{'grep'}} :
00093 $init{'grep'} ? ( $init{'grep'} ) : ( );
00094
00095 # if (isa($init{'grep'}, 'ARRAY'))
00096 # {
00097 # my @matches = map
00098 # {
00099 # isa($_, 'Regexp') ? $_ : qr($_);
00100 # } @{$init{'grep'}};
00101 #
00102 # $self->{patterns} = \@matches;
00103 # }
00104 #
00105 # elsif ($init{'grep'})
00106 # {
00107 # $self->{patterns} = isa($init{'grep'}, 'Regexp')
00108 # ? [ $init{'grep'} ]
00109 # : [ qr($init{'grep'}) ];
00110 # }
00111
00112 $self
00113 }
00114
00115 ###########################################################################
00116 ###########################################################################
00117 sub catchUp { $_[0]->{catchUp} }
00118 sub dead { $_[0]->{dead} }
00119 sub handle { $_[0]->{handle} }
00120 sub lastDate { $_[0]->{lastDate} }
00121 sub name { $_[0]->{name} }
00122 sub path { $_[0]->{pathname} }
00123
00124 ###########################################################################
00125 ###########################################################################
00126 sub getparsed # $self, ...
00127 #
00128 {
00129 my $self = shift;
00130 #!# my $auto = Log->method(@_)->block;
00131
00132 # There might be one we just parsed in getmsg():
00133 return shift @{$self->{parsed}}
00134 if @{$self->{parsed}};
00135
00136 # Need a new one, no old ones cached:
00137 while (my $line = $self->{handle}->getline)
00138 {
00139 chomp $line;
00140
00141 next unless $line; # get rid of blank lines in logs
00142
00143 my @data = $self->parse($line, @_);
00144
00145 $self->{lastDate} = $data[0] # track last msg time
00146 if $data[0] > $self->{lastDate};
00147
00148 next unless $data[2]; # get rid of blank messages in logs
00149
00150 return \@data;
00151 }
00152
00153 # Here we are, having hit the end of the file,
00154 # is it one that hasn't seen action in a while?
00155 my (undef, undef, undef, undef, undef, undef, undef, undef, undef,
00156 $mtime) = $self->{handle}->stat;
00157
00158 return
00159 unless $now - $mtime > $idlesec;
00160
00161 # File hasn't seen action in a while,
00162 # giving up means maybe some other file can open:
00163 $self->{dead} = time;
00164 undef $self->{handle};
00165 undef
00166 }
00167
00168 ###########################################################################
00169 sub getmsg # $self, $now [, $unrec ]
00170 # Return a single message from the stream.
00171 # Must parse message and collect related lines into single item.
00172 #
00173 {
00174 my $self = shift;
00175 #!# my $auto = Log->method(@_)->block;
00176 my $now = shift;
00177
00178 return undef if $self->{dead};
00179
00180 # Look for a message previously parsed that is good timewise:
00181 #!# Log->trace('Returning queued message...'),
00182 return $self->{queue}->[0]->[0] <= $now
00183 ? shift(@{$self->{queue}}) # found one ready, pull from queue
00184 : $self->{queue}->[0]->[0] # not yet, so return its time
00185 if @{$self->{queue}};
00186
00187 # Nothing queued, parse a new one from the file:
00188 my $msg = undef;
00189
00190 while (my $data = $self->getparsed(@_))
00191 {
00192 #!# Log->trace('Msg: ', join(', ', @$data));
00193
00194 while (my $dnxt = $self->getparsed(@_))
00195 {
00196 #!# Log->trace('Ext: ', join(', ', @$dnxt));
00197 # Get any extension messages ('+' flag)
00198 if ($dnxt->[1] eq '+')
00199 {
00200 # Append next line onto this one:
00201 $data->[2] .= "\n";
00202 $data->[2] .= $dnxt->[2];
00203 }
00204
00205 else
00206 {
00207 # Try the parsed line again later:
00208 unshift @{$self->{parsed}}, $dnxt;
00209 last; # done for now
00210 }
00211 }
00212
00213 if ($self->{virgin} && ! $self->{catchUp})
00214 {
00215 # First time through, if we're not in catch-up mode
00216 # for a late addition, want to skip through to
00217 # find beginning of file:
00218 if ($data->[0] < $now)
00219 {
00220 #!# Log->trace('Skipping early one ', $data->[0], '<', $now),
00221 $self->{skipped} = $self->skip($now)
00222 unless $self->{skipped};
00223
00224 next;
00225 }
00226
00227 # We've got a good message in our time slot,
00228 # don't come through this code any more:
00229 undef $self->{virgin};
00230 }
00231
00232 #!# Log->trace('Skipping uninteresting one'),
00233 next
00234 unless $self->matches($data->[2]);
00235
00236 undef $self->{catchUp}
00237 if $self->{catchUp}
00238 && $data->[0] >= $now;
00239
00240 #!# Log->trace('Queue late one: ', scalar(localtime($data->[0]))),
00241 push(@{$self->{queue}}, $data),
00242 return $data->[0]
00243 if $data->[0] > $now;
00244
00245 #!# Log->trace('returning good one');
00246 return $data;
00247 }
00248 }
00249
00250 ###########################################################################
00251 sub makeTime # $self, $date, $sec, $min, $hour, $mday, $mon, $year
00252 #
00253 {
00254 my $self = shift;
00255 #!# my $auto = Log->method(@_)->block;
00256 my $date = shift;
00257
00258 if (@_ > 5)
00259 {
00260 # Convert month name to number if necessary:
00261 my ($sec, $min, $hour, $day, $mon, $year) = @_;
00262 my $res = undef;
00263
00264 if (int($mon)) { $mon--; }
00265 else { $mon = $month{$mon}; }
00266
00267 $year -= 1900 if $year > 1900;
00268
00269 eval
00270 {
00271 $res = timelocal($sec, $min, $hour, $day, $mon, $year);
00272 };
00273
00274 Log->warning("Unable to make time $date properly\n $@"),
00275 undef $res
00276 if $@;
00277
00278 return $res if int($res)
00279 }
00280
00281 Log->warning('Must parse ', $self->name,
00282 "\n date ", $date, ' ', join(', ', @_),
00283 "\n with Date::Manip");
00284 $date = ParseDateString($date);
00285 $date && UnixDate($date, '%s')
00286 }
00287
00288 ###########################################################################
00289 sub matches # $self, $msg
00290 #
00291 {
00292 my $self = shift;
00293 #!# my $auto = Log->method(@_)->block;
00294 my $msg = shift;
00295
00296 #!# Log->trace('Nothing specified'),
00297 return 1 # always matches when nothing specified
00298 unless @{$self->{patterns}};
00299
00300 map {
00301 #!# Log->trace('- ', $_);
00302 #!# Log->trace(' matched!'),
00303 return 1
00304 if $msg =~ $_;
00305 } @{$self->{patterns}};
00306
00307 undef
00308 }
00309
00310 ###########################################################################
00311 sub parse # $self, $line [, $unrec ]
00312 # Parse a log file line.
00313 # This version does the format generated by MMAgic::Log.
00314 # Overload to define other parsing strategies.
00315 #
00316 {
00317 my $self = shift;
00318 #!# my $auto = Log->method(@_)->block;
00319 my $line = shift;
00320 my ($date, $flag, $msg) = (undef, '?', $line);
00321
00322 if ($line =~ /^\s*([^\[\]]*) \[([^\[\]])\] (.*)$/)
00323 {
00324 # Timestamp, then flag:
00325 $date = $1;
00326 $flag = $2;
00327 $msg = $3;
00328 }
00329
00330 elsif ($line =~ /^\s*\[([^\[\]])\] ([0-9\/: ]{8,19}) (.*)$/)
00331 {
00332 # Flag, then timestamp:
00333 $date = $2;
00334 $flag = $1;
00335 $msg = $3;
00336 }
00337
00338 elsif (my $unrec = shift())
00339 {
00340 $unrec->warning('Unrecognizable ', $self->name, " line:\n ", $line);
00341 }
00342
00343 if ($date)
00344 {
00345 if ($date =~ /^\s*(\d{1,2}):(\d{1,2}):(\d{1,2})\s*$/)
00346 {
00347 # Time only:
00348 my @now = localtime(time);
00349
00350 ($now[0], $now[1], $now[2]) = ($3, $2, $1);
00351 $date = $self->makeTime($date, @now);
00352 }
00353
00354 elsif ($date =~ m|(\d{2,4})/(\d{1,2})/(\d{1,2}) (\d{1,2}):(\d{1,2}):(\d{1,2})|)
00355 {
00356 # Date and time:
00357 $date = $self->makeTime($date, $6, $5, $4, $3, $2, $1);
00358 }
00359
00360 else
00361 {
00362 # Oh, well, better to go for it slowly than not at all:
00363 $date = $self->makeTime($date);
00364 }
00365 }
00366
00367 # Log->warning('No date from ', $self->name),
00368 $date = $self->lastDate
00369 unless int($date);
00370
00371 ( $date, $flag, $msg )
00372 }
00373
00374 ###########################################################################
00375 sub skip # $self, $now
00376 # Skip ahead through file to find our time slot.
00377 # Attempt to shorten long waits loading Apache error logs.
00378 #
00379 {
00380 my $self = shift;
00381 #!# my $auto = Log->method(@_)->block;
00382 my $now = shift;
00383 my $here = $self->{handle}->tell;
00384
00385 return undef
00386 if $here < 0; # error in tell()
00387
00388 return undef
00389 unless $self->{handle}->seek(0, SEEK_END);
00390
00391 my $end = $self->{handle}->tell;
00392
00393 $self->{handle}->seek($here, SEEK_SET),
00394 return undef
00395 if $end < 0; # error in tell()
00396
00397 my $low = $here;
00398 my $high = $end;
00399
00400 BSearch: # binary search for block where we should start parsing
00401 while ($high - $low > $granule)
00402 {
00403 # There's some potential payoff to skipping ahead:
00404 my $mid = ($low + $high) / 2;
00405 #!# Log->trace('High: ', $high);
00406 #!# Log->trace('Mid: ', $mid);
00407 #!# Log->trace('Low: ', $low);
00408
00409 last unless $self->{handle}->seek($mid, SEEK_SET);
00410
00411 # Trash the first partial line:
00412 my $line = $self->{handle}->getline;
00413
00414 # Memorize this point in the file in case it's good:
00415 last if ($mid = $self->{handle}->tell) < 0;
00416
00417 while ($line = $self->{handle}->getline)
00418 {
00419 my @data = $self->parse($line);
00420
00421 if ($data[1] eq '+')
00422 {
00423 # Left-over from previous message block,
00424 # trash this entire line, memorize new point:
00425 last BSearch if ($mid = $self->{handle}->tell) < 0;
00426 }
00427
00428 elsif ($data[0] < $now)
00429 {
00430 # This one's too far in the past,
00431 # so it's safe to move up this far!:
00432 $low = $mid;
00433 last
00434 }
00435
00436 else
00437 {
00438 # Still too far in the future,
00439 # or dead on but that still means go back:
00440 $high = $mid;
00441 last
00442 }
00443 }
00444 }
00445
00446 # Payoff! Here's where we get to jump ahead
00447 # to the last known good point:
00448 $self->{handle}->seek($low, SEEK_SET);
00449 # (doesn't matter if $low is what used to be $here,
00450 # we've been moving the file pointer and have to
00451 # reset it to $here if there is no $low anyway)
00452
00453 Log->info ('Skipped ', $low - $here, ' bytes in ', $self->{name})
00454 unless $low == $here;
00455
00456 #!# Log->trace('Skipped nowhere') if $low == $here;
00457 1
00458 }
00459
00460 1