Icon for Dimension.orgIcon for TalentIcon for ToolsIcon for Doorways.org
Icon for ToolsIcon for Perl Tools

MMAgic Demo: MMAgic/Log/Parser.pm Source File

  • Main Page
  • Related Pages
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

MMAgic/Log/Parser.pm

Go to the documentation of this file.
00001 package MMAgic::Log::Parser;
00002 #
00003 # MMAgic/Log/Parser.pm
00004 #
00005 #   Parse log files.
00006 #
00007 
00008 use     Date::Manip;
00009 use     File::Basename;
00010 use     Time::Local;
00011 use     UNIVERSAL   qw(isa);
00012 
00013 use     MMAgic::Log;
00014 use     MMAgic::Tie::Hash::Fuzzy;
00015 use     MMAgic::Trace;
00016 
00017 # Would include these from Fcntl but stuck w/Perl 5.003 on Linux 6.1:
00018 sub SEEK_SET    { 0 }
00019 sub SEEK_CUR    { 1 }
00020 sub SEEK_END    { 2 }
00021 
00022 $nameLen =    8;
00023 $granule = 1024;
00024 $idlesec = 7200;    # two hours
00025 
00026 tie %month, 'MMAgic::Tie::Hash::Fuzzy';
00027 map {
00028     $month{qr/^$_/i} = int($month);
00029     $month++;
00030     } qw(jan feb mar apr may jun jul aug sep oct nov dec);
00031 
00032 ###########################################################################
00033 ###########################################################################
00034 sub new     # $class, %init
00035 #
00036     {
00037     my  $clss = shift;
00038 #!# my  $auto = Log->method('%', @_)->block;
00039     my  %init = @_;
00040     
00041     Log->error('File ', $init{path}, ' doesn\'t exist.'),
00042     return undef
00043         unless -f $init{path};
00044     
00045     Log->error('File ', $init{path}, ' isn\'t readable.'),
00046     return undef
00047         unless -r $init{path};
00048     
00049     my  $hndl = new IO::File($init{path});
00050     
00051     Log->error('Unable to open ', $init{path}, ' for reading'),
00052     return undef
00053         unless isa($hndl, 'IO::File');
00054     
00055     my ($name, $path, $suff) = fileparse($init{path}, '.[^\.]*');
00056     
00057     $name = $init{name} if $init{name};
00058     
00059     if ($suff =~ /^\.\d*$/)
00060         {
00061         # Suffix is all digits, map over filename:
00062         $name .=  ' ' while length($name) < $nameLen;
00063         $suff  =~ s/^\.0*(\d)$/$1/;
00064         substr($name, $nameLen - length($suff), length($suff)) = $suff;
00065         }
00066     
00067     if ($init{type})
00068         {
00069         $clss = __PACKAGE__ . "::$init{type}";
00070         eval "require $clss";
00071 
00072         Log->error('Unable to load class ', $clss, "\n  $@"),
00073         return undef
00074             if $@;
00075         }
00076     
00077     my  $self = bless
00078         {
00079         catchUp  => $init{catchUp},
00080         handle   => $hndl,
00081         name     => $name,
00082         pathname => $init{path},
00083         parsed   => [ ],
00084         patterns => [ ],
00085         queue    => [ ],
00086         virgin   => 1,
00087         }, $clss;
00088     
00089     map {
00090         push @{$self->{patterns}},
00091              isa($_, 'Regexp') ? $_ : qr($_);
00092         } isa($init{'grep'}, 'ARRAY') ? @{$init{'grep'}}  :
00093           $init{'grep'}               ? ( $init{'grep'} ) : ( );
00094     
00095 #   if (isa($init{'grep'}, 'ARRAY'))
00096 #       {
00097 #       my  @matches = map
00098 #           {
00099 #           isa($_, 'Regexp') ? $_ : qr($_);
00100 #           } @{$init{'grep'}};
00101 #       
00102 #       $self->{patterns} = \@matches;
00103 #       }
00104 #   
00105 #   elsif ($init{'grep'})
00106 #       {
00107 #       $self->{patterns} = isa($init{'grep'}, 'Regexp')
00108 #                         ? [ $init{'grep'} ]
00109 #                         : [ qr($init{'grep'}) ];
00110 #       }
00111     
00112     $self
00113     }
00114 
00115 ###########################################################################
00116 ###########################################################################
00117 sub catchUp     {   $_[0]->{catchUp}    }
00118 sub dead        {   $_[0]->{dead}       }
00119 sub handle      {   $_[0]->{handle}     }
00120 sub lastDate    {   $_[0]->{lastDate}   }
00121 sub name        {   $_[0]->{name}       }
00122 sub path        {   $_[0]->{pathname}   }
00123 
00124 ###########################################################################
00125 ###########################################################################
00126 sub getparsed   # $self, ...
00127 #
00128     {
00129     my  $self = shift;
00130 #!# my  $auto = Log->method(@_)->block;
00131 
00132     # There might be one we just parsed in getmsg():
00133     return shift @{$self->{parsed}}
00134         if @{$self->{parsed}};
00135     
00136     # Need a new one, no old ones cached:
00137     while (my $line = $self->{handle}->getline)
00138         {
00139         chomp $line;
00140 
00141         next unless $line;      # get rid of blank lines in logs
00142         
00143         my  @data = $self->parse($line, @_);
00144         
00145         $self->{lastDate} = $data[0]    # track last msg time
00146             if $data[0] > $self->{lastDate};
00147         
00148         next unless $data[2];   # get rid of blank messages in logs
00149         
00150         return \@data;
00151         }
00152     
00153     # Here we are, having hit the end of the file,
00154     #   is it one that hasn't seen action in a while?
00155     my (undef, undef, undef, undef, undef, undef, undef, undef, undef,
00156         $mtime) = $self->{handle}->stat;
00157     
00158     return
00159         unless $now - $mtime > $idlesec;
00160 
00161     # File hasn't seen action in a while,
00162     #   giving up means maybe some other file can open:
00163     $self->{dead} = time;
00164     undef $self->{handle};
00165     undef
00166     }
00167 
00168 ###########################################################################
00169 sub getmsg  # $self, $now [, $unrec ]
00170 # Return a single message from the stream.
00171 #   Must parse message and collect related lines into single item.
00172 #
00173     {
00174     my  $self = shift;
00175 #!# my  $auto = Log->method(@_)->block;
00176     my  $now  = shift;
00177 
00178     return undef if $self->{dead};
00179     
00180     # Look for a message previously parsed that is good timewise:
00181 #!# Log->trace('Returning queued message...'),
00182     return $self->{queue}->[0]->[0] <= $now
00183          ? shift(@{$self->{queue}}) # found one ready, pull from queue
00184          : $self->{queue}->[0]->[0] # not yet, so return its time
00185         if @{$self->{queue}};
00186     
00187     # Nothing queued, parse a new one from the file:
00188     my  $msg = undef;
00189     
00190     while (my $data = $self->getparsed(@_))
00191         {
00192 #!#     Log->trace('Msg:  ', join(', ', @$data));
00193         
00194         while (my $dnxt = $self->getparsed(@_))
00195             {
00196 #!#         Log->trace('Ext:  ', join(', ', @$dnxt));
00197             # Get any extension messages ('+' flag)
00198             if ($dnxt->[1] eq '+')
00199                 {
00200                 # Append next line onto this one:
00201                 $data->[2] .= "\n";
00202                 $data->[2] .= $dnxt->[2];
00203                 }
00204             
00205             else
00206                 {
00207                 # Try the parsed line again later:
00208                 unshift @{$self->{parsed}}, $dnxt;
00209                 last;   # done for now
00210                 }
00211             }
00212 
00213         if ($self->{virgin} && ! $self->{catchUp})
00214             {
00215             # First time through, if we're not in catch-up mode
00216             #   for a late addition, want to skip through to
00217             #   find beginning of file:
00218             if ($data->[0] < $now)
00219                 {
00220 #!#             Log->trace('Skipping early one ', $data->[0], '<', $now),
00221                 $self->{skipped} = $self->skip($now)
00222                     unless $self->{skipped};
00223                 
00224                 next;
00225                 }
00226             
00227             # We've got a good message in our time slot,
00228             #   don't come through this code any more:
00229             undef $self->{virgin};
00230             }
00231         
00232 #!#     Log->trace('Skipping uninteresting one'),
00233         next
00234             unless $self->matches($data->[2]);
00235         
00236         undef $self->{catchUp}
00237             if $self->{catchUp}
00238             && $data->[0] >= $now;
00239         
00240 #!#     Log->trace('Queue late one:  ', scalar(localtime($data->[0]))),
00241         push(@{$self->{queue}}, $data),
00242         return $data->[0]
00243             if $data->[0] > $now;
00244         
00245 #!#     Log->trace('returning good one');
00246         return $data;
00247         }
00248     }
00249 
00250 ###########################################################################
00251 sub makeTime    # $self, $date, $sec, $min, $hour, $mday, $mon, $year
00252 #
00253     {
00254     my  $self = shift;
00255 #!# my  $auto = Log->method(@_)->block;
00256     my  $date = shift;
00257     
00258     if (@_ > 5)
00259         {
00260         # Convert month name to number if necessary:
00261         my ($sec, $min, $hour, $day, $mon, $year) = @_;
00262         my  $res = undef;
00263         
00264         if (int($mon))  {   $mon--;                 }
00265         else            {   $mon = $month{$mon};    }
00266         
00267         $year -= 1900 if $year > 1900;
00268         
00269         eval
00270             {
00271             $res = timelocal($sec, $min, $hour, $day, $mon, $year);
00272             };
00273         
00274         Log->warning("Unable to make time $date properly\n  $@"),
00275         undef $res
00276             if $@;
00277         
00278         return $res if int($res)
00279         }
00280     
00281     Log->warning('Must parse ', $self->name,
00282                "\n  date ", $date, ' ', join(', ', @_),
00283                "\n  with Date::Manip");
00284     $date = ParseDateString($date);
00285     $date && UnixDate($date, '%s')
00286     }
00287 
00288 ###########################################################################
00289 sub matches # $self, $msg
00290 #
00291     {
00292     my  $self = shift;
00293 #!# my  $auto = Log->method(@_)->block;
00294     my  $msg  = shift;
00295     
00296 #!# Log->trace('Nothing specified'),
00297     return 1    # always matches when nothing specified
00298         unless @{$self->{patterns}};
00299     
00300     map {
00301 #!#     Log->trace('- ', $_);
00302 #!#     Log->trace('  matched!'),
00303         return 1
00304             if $msg =~ $_;
00305         } @{$self->{patterns}};
00306     
00307     undef
00308     }
00309 
00310 ###########################################################################
00311 sub parse   # $self, $line [, $unrec ]
00312 # Parse a log file line.
00313 #   This version does the format generated by MMAgic::Log.
00314 #   Overload to define other parsing strategies.
00315 #
00316     {
00317     my  $self = shift;
00318 #!# my  $auto = Log->method(@_)->block;
00319     my  $line = shift;
00320     my ($date, $flag, $msg) = (undef, '?', $line);
00321     
00322     if ($line =~ /^\s*([^\[\]]*) \[([^\[\]])\] (.*)$/)
00323         {
00324         # Timestamp, then flag:
00325         $date = $1;
00326         $flag = $2;
00327         $msg  = $3;
00328         }
00329     
00330     elsif ($line =~ /^\s*\[([^\[\]])\] ([0-9\/: ]{8,19}) (.*)$/)
00331         {
00332         # Flag, then timestamp:
00333         $date = $2;
00334         $flag = $1;
00335         $msg  = $3;
00336         }
00337     
00338     elsif (my $unrec = shift())
00339         {
00340         $unrec->warning('Unrecognizable ', $self->name, " line:\n  ", $line);
00341         }
00342     
00343     if ($date)
00344         {
00345         if ($date =~ /^\s*(\d{1,2}):(\d{1,2}):(\d{1,2})\s*$/)
00346             {
00347             # Time only:
00348             my  @now = localtime(time);
00349             
00350             ($now[0], $now[1], $now[2]) = ($3, $2, $1);
00351             $date = $self->makeTime($date, @now);
00352             }
00353         
00354         elsif ($date =~ m|(\d{2,4})/(\d{1,2})/(\d{1,2}) (\d{1,2}):(\d{1,2}):(\d{1,2})|)
00355             {
00356             # Date and time:
00357             $date = $self->makeTime($date, $6, $5, $4, $3, $2, $1);
00358             }
00359         
00360         else
00361             {
00362             # Oh, well, better to go for it slowly than not at all:
00363             $date = $self->makeTime($date);
00364             }
00365         }
00366     
00367 #   Log->warning('No date from ', $self->name),
00368     $date = $self->lastDate
00369         unless int($date);
00370     
00371     ( $date, $flag, $msg )
00372     }
00373 
00374 ###########################################################################
00375 sub skip    # $self, $now
00376 # Skip ahead through file to find our time slot.
00377 #   Attempt to shorten long waits loading Apache error logs.
00378 #
00379     {
00380     my  $self = shift;
00381 #!# my  $auto = Log->method(@_)->block;
00382     my  $now  = shift;
00383     my  $here = $self->{handle}->tell;
00384     
00385     return undef
00386         if $here < 0;  # error in tell()
00387     
00388     return undef
00389         unless $self->{handle}->seek(0, SEEK_END);
00390     
00391     my  $end  = $self->{handle}->tell;
00392     
00393     $self->{handle}->seek($here, SEEK_SET),
00394     return undef
00395         if $end < 0;    # error in tell()
00396 
00397     my  $low  = $here;
00398     my  $high = $end;
00399     
00400   BSearch:  # binary search for block where we should start parsing
00401     while ($high - $low > $granule)
00402         {
00403         # There's some potential payoff to skipping ahead:
00404         my  $mid  = ($low + $high) / 2;
00405 #!#     Log->trace('High:  ', $high);
00406 #!#     Log->trace('Mid:   ', $mid);
00407 #!#     Log->trace('Low:   ', $low);
00408         
00409         last unless $self->{handle}->seek($mid, SEEK_SET);
00410         
00411         # Trash the first partial line:
00412         my  $line = $self->{handle}->getline;
00413         
00414         # Memorize this point in the file in case it's good:
00415         last if ($mid = $self->{handle}->tell) < 0;
00416         
00417         while ($line = $self->{handle}->getline)
00418             {
00419             my  @data = $self->parse($line);
00420             
00421             if ($data[1] eq '+')
00422                 {
00423                 # Left-over from previous message block,
00424                 #   trash this entire line, memorize new point:
00425                 last BSearch if ($mid = $self->{handle}->tell) < 0;
00426                 }
00427 
00428             elsif ($data[0] < $now)
00429                 {
00430                 # This one's too far in the past,
00431                 #   so it's safe to move up this far!:
00432                 $low = $mid;
00433                 last
00434                 }
00435             
00436             else
00437                 {
00438                 # Still too far in the future,
00439                 #   or dead on but that still means go back:
00440                 $high = $mid;
00441                 last
00442                 }
00443             }
00444         }
00445     
00446     # Payoff!  Here's where we get to jump ahead
00447     #   to the last known good point:
00448     $self->{handle}->seek($low, SEEK_SET);
00449     # (doesn't matter if $low is what used to be $here,
00450     #  we've been moving the file pointer and have to
00451     #  reset it to $here if there is no $low anyway)
00452 
00453     Log->info ('Skipped ', $low - $here, ' bytes in ', $self->{name})
00454         unless $low == $here;
00455 
00456 #!# Log->trace('Skipped nowhere') if     $low == $here;
00457     1
00458     }
00459 
00460 1

Generated on Mon Dec 27 2010 15:15:42 for MMAgic Demo by  doxygen 1.7.1

www.dimension.org logo

(C)opyright 1998 - 2012 Dimension.org

WebMaster