#!/usr/bin/perl -w
#
# qrm2          --crb3 12oct03/..26aug06/01apr07
#
# workspace = demonhouse:/home/crb3/crb3/perl/s/spam/qrm2/qrm2
#
# --crb3 05Feb07: lockfile added.
# --crb3 01Apr07: rc read-in speedup
#
$chatty=1;
$debug=0;
$safety=0;

#
# build a quick lookup-and-locate hash for RC elements. the 'user='
# entries are sucked in but otherwise ignored. that was an explicit
# list for the first 'qrm' of what user Maildirs to check; qrm2
# iterates over the Maildir system instead.
#
foreach $el (
qw/user skipto skipfrom skipsubj skipreceived
   rexskipto rexskipfrom rexskipsubj rexskipreceived
   to from received subj body returnpath skipreturnpath rexreturnpath
   rexskipreturnpath rexto rexfrom rexrecieved rexsubj rexbody/ ) {
  if(index($el,'body')>-1){
#   next unless defined($el) and length($el);
   ($com = $el) =~ s/body/bodies/;
  }else{
    $com = $el . 's';
  }
  $lkup{$el}=$com;
}

$dolock=1;

$starttime=time();      # snag startup time for runtime display
$timenow=`/bin/date -u`;
#$timenow =~ s/\:/\\:/g;
chomp $timenow;

#($pname=$0) =~ s/^.*\///;      # shave off ./ etc

$reportname="/var/local/qrm/qrm2log";

$pname="qrm";           # ** SHORT-CIRCUIT so cfg's are "qrmrc" for dev/debug

$homebase="/var/local/$pname";  # configfile names
$cfg="$homebase/".$pname."rc";
$localcfg=".".$pname."rc";

$lock="$homebase/qrm2.lock";

($pname=$0) =~ s/^.*\///;       # shave off ./ etc **

$basedir="home";

$mailtohome="stormbringer.org";

$logger="/usr/bin/logger";              # this works in linux
$loglvl="alert";
$logcmd="$logger -p mail.$loglvl -t $pname ";

if($dolock){
  if(-e $lock){
    loggit("found lockfile; going home.");
    exit(1);
  }
  `touch $lock`;
}

$check=0;               # count of mails checked systemwide
$spams=0;
$goods=0;

$notrailers=1;          # don't allow trailers (Subject: subjsubj       trailer)
$trailercount=6;        # how many spaces within subj is too many?

if(readrc(\%rc,$cfg)==-1){      # read in global cfg
  loggit("can't open configfile $cfg");
  die "$pname: can't open configfile $cfg\n";
}

$cmtmax=5;              # how many <!--xxx--> comments is too many?
$clrmax=5;              # how many <font color= is too many?

$cmtmax=$rc{cmtmax} if exists $rc{cmtmax};
$clrmax=$rc{clrmax} if exists $rc{clrmax};
$chatty=$rc{chatty} if exists $rc{chatty};
$debug=$rc{debug} if exists $rc{debug};

chdir("/$basedir");     # move here once and scoop
for (<*>){              # up all users so we don't have to
  next unless -d;       # keep seeking back here after
  push(@users,$_);      # each pass
}

$trailercount=$rc{trailercount} if exists $rc{trailercount};

foreach $user (@users){
  print "/$basedir/$user...\n" if $chatty;
  $qrms{$user}=$checks{$user}=$oks{$user}=0;
  peruser(\%rc,$user);
}

if($chatty){
  if($check){
    foreach $user (@users){
      if($checks{$user}){
        loggit("$user".$checks{$user}." check".
          ($checks{$user} > 1 ? "s" : "") . ""
          .$oks{$user}." oks, ".$qrms{$user}." qrms.");
      }
    }
  }
}

system("echo \"$timenow $check chk $spams qrm $goods new\" >>$reportname")
   if $check;

$stoptime=time();
$elapsed=$stoptime-$starttime;

loggit( ($check ? "" : "no new mail for checks. ")
                . "run completed, $elapsed sec.");

`rm -f $lock` if $dolock;


#---------------------------------
#
# peruser.
# process the specific user's cfgfile and mail.
# returns -2: error (no /chk);
#         -1: run, no local .rc;
#          0: run=no;
#          1: run w/ local .rc.
#
sub peruser {
  my($rc,$user)=(@_);   # \%rc, $"user
  my $cfg="/$basedir/$user/$localcfg";
  my(%lrc);
#
# pick up this usr's cfg. doesn't exist? use just global.
# no Maildir/chk? skip. cfg exists and has "run=no" ? skip.
#
  if(-f $cfg){
    return(0unless ($retcode=readrc(\%lrc,$cfg));
  }
#
# iterate through mails.
# first, regenerate dir-string globals per user.
#
  $chkdir="/$basedir/$user/Maildir/chk";
  $qrmdir="/$basedir/$user/Maildir/qrm";
  $newdir="/$basedir/$user/Maildir/new";
  $mailto="$user\@$mailtohome";

  return(-1unless chdir($chkdir);     # dir doesn't exist? go away.
  for (<*>){
    next unless -f;
    permail($rc,\%lrc,$user,$_);
  }
  return($retcode);
}

#
# permail.
# per-mail function.
#
# returns -2: error. -1: good. 0: don't-know. 1: spam.
# mail which is 0, don't-know, is untouched, left in /chk for
# spamprobe to test. -1 good mail is moved into /new. 1 spam mail
# is moved into /qrm.
#
sub permail {
  my($rc,$lrc,$user,$msg)=(@_);

  loggit("$user: testing $msg"if $chatty;
  unless(open(M,"<$msg")){
    loggit("$user: can't open $msg for test");
    warn "$pname$user can't open $msg for test\n";
    return(-2);
  }
  $checks{$user}++;
  $check++;
  $qrm=0;               # is this file QRM(1)? good(-1)?
  $cmtcnt=$clrcnt=0;
  $inheader=1;          # one-bit statecount
  print "$user $msg " if $debug;
  while(defined($inline=<M>)){     # go thru msg line by line
    chomp $inline;
    $inline =~ s/\?\n//;
    if($inline =~ /^$/){
      print "-------end of headers, next is body--------\n"
                    if $debug and $inheader;
      $inheader=0;
      next;
    }
    if($inheader){                      # one-bit state
      if($inline =~ /^\S+\:\s/){        # header line
        ($field,$arg)=split(/\:\s/,$inline,2);
        $field=lc($field);
        $arg=lc($arg);
      }else{                            # fieldname persists
        ($arg=$inline) =~ s/^\s+//;
      }
      print "field=$field arg=$arg\n" if $debug;
      if($field eq "to" or $field eq "cc"){
        print "skipto, " if $debug;     # whitelist filter.

        $gp=$rc->{rexskiptos};          # get pointers to arrays
        $lp=$lrc->{rexskiptos};         #  within hashes
        foreach $skipto (@$gp,@$lp){    # an apparent mail-to-self
          if($arg =~ /$skipto/){        # is exempt to exemption;
            $qrm=-1;                    # spammers do that a lot.
            if($chatty){
              loggit("$user: skipped: $skipto rx $arg");
            }
            if($debug){
              print "$user: skipped: $skipto rx $arg\n";
            }
            $why="skip: $skipto rx to or cc";
            goto SHORTCIRCUIT;
          }
        }
        $gp=$rc->{skiptos};             # get pointers to arrays
        $lp=$lrc->{skiptos};            #  within hashes
        foreach $skipto (@$gp,@$lp){    # an apparent mail-to-self
          if(index($arg,$skipto)>-1){   # is exempt to exemption;
            $qrm=-1;                    # spammers do that a lot.
            if($chatty){
              loggit("$user: skipped: $skipto in $arg");
            }
            if($debug){
              print "$user: skipped: $skipto in $arg\n";
            }
            $why="skip: $skipto in to or cc";
            goto SHORTCIRCUIT;
          }
        }

        print "to, " if $debug;
        $gp=$rc->{rextos}; $lp=$lrc->{rextos};
        foreach $to (@$gp,@$lp){
          if($arg =~ /$to/){
            $qrm=1;
            $why="$to rx to";
            print "$pname $user: qrm $msg $why\n" if $debug;
            goto SHORTCIRCUIT;
          }
        }
        $gp=$rc->{tos}; $lp=$lrc->{tos};
        foreach $to (@$gp,@$lp){
          if(index($arg,$to)>-1){
            $qrm=1;
            $why="$to in to";
            print "$pname $user: qrm $msg $why\n" if $debug;
            goto SHORTCIRCUIT;
          }
        }

      }elsif($field eq "return-path"){

        $gp=$rc->{rexskipreturnpaths};          # get pointers to arrays
        $lp=$lrc->{rexskipreturnpaths};         #  within hashes
        foreach $skipreturnpath (@$gp,@$lp){    # an apparent mail-to-self
          if($arg =~ /$skipreturnpath/){        # is exempt to exemption;
            $qrm=-1;                    # spammers do that a lot.
            if($chatty){
              loggit("$user: skipped: $skipreturnpath rx $arg");
            }
            if($debug){
              print "$user: skipped: $skipreturnpath rx $arg\n";
            }
            $why="skip: $skipreturnpath rx returnpath";
            goto SHORTCIRCUIT;
          }
        }
        $gp=$rc->{skipreturnpaths};             # get pointers to arrays
        $lp=$lrc->{skipreturnpaths};            #  within hashes
        foreach $skipreturnpath (@$gp,@$lp){    # an apparent mail-to-self
          if(index($arg,$skipreturnpath)>-1){   # is exempt to exemption;
            $qrm=-1;                    # spammers do that a lot.
            if($chatty){
              loggit("$user: skipped: $skipreturnpath in $arg");
            }
            if($debug){
              print "$user: skipped: $skipreturnpath in $arg\n";
            }
            $why="skip: $skipreturnpath in returnpath";
            goto SHORTCIRCUIT;
          }
        }

        $gp=$rc->{rexreturnpaths}; $lp=$lrc->{rexreturnpaths};
        foreach $returnpath (@$gp,@$lp){
          if($arg =~ /$returnpath/){
            $qrm=1;
            $why="$returnpath rx returnpath";
            print "$pname $user: qrm $msg $why\n" if $debug;
            goto SHORTCIRCUIT;
          }
        }
        $gp=$rc->{returnpaths}; $lp=$lrc->{returnpaths};
        foreach $returnpath (@$gp,@$lp){
          if(index($arg,$returnpath)>-1){
            $qrm=1;
            $why="$returnpath in returnpath";
            print "$pname $user: qrm $msg $why\n" if $debug;
            goto SHORTCIRCUIT;
          }
        }




      }elsif($field eq "from"){
        if(index($arg,$mailto)<0){
          print "skipfrom, " if $debug;         # whitelist filter.
          $gp=$rc->{rexskipfroms}; $lp=$lrc->{rexskipfroms};
          foreach $skipfrom (@$gp,@$lp){        # an apparent mail-to-self
            if($arg =~ /$skipfrom/){            # is exempt from exemption;
              $qrm=-1;                          # spammers do that a lot.
              if($chatty){
                loggit("$user: skipped: $skipfrom rx $arg");
              }
              if($debug){
                print "$user: skipped: $skipfrom rx $arg\n";
              }
              $why="skip: $skipfrom rx from";
              goto SHORTCIRCUIT;
            }
          }
          $gp=$rc->{skipfroms}; $lp=$lrc->{skipfroms};
          foreach $skipfrom (@$gp,@$lp){        # an apparent mail-to-self
            if(index($arg,$skipfrom)>-1){       # is exempt from exemption;
              $qrm=-1;                          # spammers do that a lot.
              if($chatty){
                loggit("$user: skipped: $skipfrom in $arg");
              }
              if($debug){
                print "$user: skipped: $skipfrom in $arg\n";
              }
              $why="skip: $skipfrom in from";
              goto SHORTCIRCUIT;
            }
          }
        }
        print "from, " if $debug;
        $gp=$rc->{rexfroms}; $lp=$lrc->{rexfroms};      # grab pointers, so we can
        foreach $from (@$gp,@$lp){              #  unroll as one list.
          if($arg =~ /$from/){
            $qrm=1;
            $why="$from rx from";
            print "$pname $user: qrm $msg $why\n" if $debug;
            goto SHORTCIRCUIT;
          }
        }
        $gp=$rc->{froms}; $lp=$lrc->{froms};    # grab pointers, so we can
        foreach $from (@$gp,@$lp){              #  unroll as one list.
          if(index($arg,$from)>-1){
            $qrm=1;
            $why="$from in from";
            print "$pname $user: qrm $msg $why\n" if $debug;
            goto SHORTCIRCUIT;
          }
        }

      }elsif($field eq "received"){
        if(index($arg,$mailto)<0){      # send-to-self is exempt from exemption.
          print "skipreceived, " if $debug;     # whitelist filter.
          $gp=$rc->{rexskipreceiveds}; $lp=$lrc->{rexskipreceiveds};
          foreach $skipreceived (@$gp,@$lp){
            if($arg =~ /$skipreceived/){
              $qrm=-1;
              if($chatty){
                loggit("$user: skipped: $skipreceived rx $arg");
              }
              if($debug){
                print "$user: skipped: $skipreceived rx $arg\n";
              }
              $why="skip: $skipreceived rx received";
              goto SHORTCIRCUIT;
            }
          }
          $gp=$rc->{skipreceiveds}; $lp=$lrc->{skipreceiveds};
          foreach $skipreceived (@$gp,@$lp){
            if(index($arg,$skipreceived)>-1){
              $qrm=-1;
              if($chatty){
                loggit("$user: skipped: $skipreceived in $arg");
              }
              if($debug){
                print "$user: skipped: $skipreceived in $arg\n";
              }
              $why="skip: $skipreceived in received";
              goto SHORTCIRCUIT;
            }
          }
        }
        print "received, " if $debug;
        $gp=$rc->{rexreceiveds}; $lp=$lrc->{rexreceiveds};
        foreach $received (@$gp,@$lp){
          if($arg =~ /$received/){
            $qrm=1;
            $why="$received rx received";
            print "$pname $user: qrm $msg $why\n" if $debug;
            goto SHORTCIRCUIT;
          }
        }
        $gp=$rc->{receiveds}; $lp=$lrc->{receiveds};
        foreach $received (@$gp,@$lp){
          if(index($arg,$received)>-1){
            $qrm=1;
            $why="$received in received";
            print "$pname $user: qrm $msg $why\n" if $debug;
            goto SHORTCIRCUIT;
          }
        }

      }elsif($field eq "subject"){
        print "skipsubj, " if $debug;   # whitelist filter.
        $gp=$rc->{rexskipsubjs}; $lp=$lrc->{rexskipsubjs};
        foreach $skipsubj (@$gp,@$lp){  # an apparent mail-to-self
          if($arg =~ /$skipsubj/){      # is exempt from exemption;
            $qrm=-1;                    # spammers do that a lot.
            if($chatty){
              loggit("$user: skipped: $skipsubj rx $arg");
            }
            if($debug){
              print "$user: skipped: $skipsubj rx $arg\n";
            }
            $why="skip: $skipsubj rx subj";
            goto SHORTCIRCUIT;
          }
        }
        $gp=$rc->{skipsubjs}; $lp=$lrc->{skipsubjs};
        foreach $skipsubj (@$gp,@$lp){  # an apparent mail-to-self
          if(index($arg,$skipsubj)>-1){ # is exempt from exemption;
            $qrm=-1;                    # spammers do that a lot.
            if($chatty){
              loggit("$user: skipped: $skipsubj in $arg");
            }
            if($debug){
              print "$user: skipped: $skipsubj in $arg\n";
            }
            $why="skip: $skipsubj in subj";
            goto SHORTCIRCUIT;
          }
        }
        print "subject, " if $debug;
        if($notrailers and $arg =~ /[\s\.\-\=]{$trailercount,}\w+/o){
          $qrm=1;
          $why="*trailer* $arg";
            print "$pname $user: qrm $msg $why\n" if $debug;
          goto SHORTCIRCUIT;
        }
        $gp=$rc->{subjs}; $lp=$lrc->{subjs};
        foreach $subj (@$gp,@$lp){
          if(index($arg,$subj)>-1){
            $qrm=1;
            $why="$subj in subj";
            print "$pname $user: qrm $msg $why\n" if $debug;
            goto SHORTCIRCUIT;
          }
        }
      }else{
        print "header $field" if $debug;
      }

    }else{                            # not header: body line
      print "$inline\n" if $debug;
      $inline=lc($inline);
      if($inline =~ /\<!/ or $inline =~ /\<.+\/\>/){
        $cmtcnt++ while $inline =~ /\<\!.+?\>/g;
        $cmtcnt++ while $inline =~ /\<\w+\s+\/\>/g;
        if($cmtcnt >= $cmtmax){
          $qrm=1;
          $why="html comments";
          print "$pname $user: qrm $msg $why\n" if $debug;
          goto SHORTCIRCUIT;
        }
      }
      if($inline =~ /font\s+color/i){
        $clrcnt++ while $inline =~ /font\s+color/gi;
        if($clrcnt >= $clrmax){
          $qrm=1;
          $why="fontcolor shifts";
          print "$pname $user: qrm $msg $why\n" if $debug;
          goto SHORTCIRCUIT;
        }
      }

# --crb3 26Aug06: add rexbody process (entry already in)

      $gp=$rc->{rexbodies}; $lp=$lrc->{rexbodies};
      foreach $body (@$gp,@$lp){
        if($arg =~ /$body/){
          $qrm=1;
          $why="$body rx body";
          print "$pname $user: qrm $msg $why\n" if $debug;
          goto SHORTCIRCUIT;
        }
      }

      $gp=$rc->{bodies}; $lp=$lrc->{bodies};
      foreach $body (@$gp,@$lp){
        if(index($inline,$body)>-1){
          $qrm=1;
          $why="$body in body";
          print "$pname $user: qrm $msg $why\n" if $debug;
          goto SHORTCIRCUIT;
        }
      }
    }
  }
  return(0if $qrm==0;                 # leave it alone.
SHORTCIRCUIT:
  print ".\n" if $debug;
  close(M);
  $msgname = $msg;
  $msgname =~ s/^\.+\///;             # shave to name alone
  if($qrm ==1){
    $qrms{$user}++; $spams++;
    loggit("$user$chkdir/$msg ($why)");

    $mvname = "$qrmdir/$msgname";
    loggit("$user: mv $msg $mvname $why"if $chatty;
    print "$user: mv $msg $mvname $why\n" if $debug;
    unless($safety){
      loggit("$user: mv failed"unless rename($msg,$mvname);
    }else{
      loggit("$user: safety on.");
    }
  }elsif($qrm == -1){                 # -1: good stuff.
    $oks{$user}++; $goods++;
    if($chatty){
      loggit("$user$msg not qrm.");
    }
    if($debug){
      print "$user$msg not qrm.\n";
    }
    $mvname = "$newdir/$msgname";
    loggit("$user$chkdir/$msg ($why)");
    loggit("$user: mv $msg $mvname $why"if $chatty;
    print "$user: mv $msg $mvname $why\n" if $debug;
    unless($safety){
      loggit("$user: mv failed"unless rename($msg,$mvname);
    }else{
      loggit("$user: safety on.");
    }
  }
}


#
# readrc.
# read in and process a cfgfile into ref'd struct.
# script will always parse the cfgfile even if 'run=no'
# keeps it from being run.
# retcode -1: no cfg. 0: no-go. 1: go.
#
sub readrc {
  my($lrc,$cfg)=(@_);   # \%lrc, $"cfg
  my $retc=1;

  open(F,"$cfg"or return(-1);
  while(defined($ln=<F>and index($ln,'__END__') <0){
    chomp $ln;
    $ln =~ s/\s+[\#\;].*//;    # whack off trailing comments
    next if $ln =~ /^\s*[\#\;]/ or $ln =~ /^\s*$/;
    ($key,$arg)=split('=',$ln,2);
    $key=lc($key); $arg=lc($arg);
    if($key eq "all"){        # look for it in all fields
      push@$lrc->{bodies} },$arg);
      push@$lrc->{subjs} },$arg);
      push@$lrc->{froms} },$arg);
      push@$lrc->{tos} },$arg);
      push@$lrc->{receiveds} },$arg);
    }elsif($key eq "rexall"){        # look for it in all fields
      push@$lrc->{rexbodies} },$arg);
      push@$lrc->{rexsubjs} },$arg);
      push@$lrc->{rexfroms} },$arg);
      push@$lrc->{rextos} },$arg);
      push@$lrc->{rexreceiveds} },$arg);
    }elsif($key eq "chatty"){
      $lrc->{chatty}=$arg;
    }elsif($key eq "trailercount"){
      $lrc->{trailercount}=$arg;
    }elsif($key eq "debug"){
      $lrc->{debug}=$arg;
    }elsif($key eq "notrailers"){
      $lrc->{notrailers}=$arg;
    }elsif($key eq "clrmax"){
      $lrc->{clrmax}=$arg;
    }elsif($key eq "cmtmax"){
      $lrc->{cmtmax}=$arg;
    }elsif($key eq "run"){
      $lrc->{run}=$arg;
      $retc=($arg eq "no" ? 0 : 1);
    }else{
      if(exists($lkup{$key})){
        push@$lrc->{ $lkup{$key} } },$arg);
      }else{
        print "$pname: unrecognized config line: $ln\n";
        loggit("unrecognized config line: $ln");
      }
    }
  }
  close(F);
  $lrc->{is}=1;         # needed? an explicitly-exists flag?

  if$debug or (exists $lrc->{debug} and $lrc->{debug} eq "1")){
    use Data::Dumper;
    print "rc=$lrc".Dumper($lrc)."\n";
  }
  return($retc);
}

#
# loggit.
#
# send a line to /var/log/maillog.
#
sub loggit {
  my $msg=shift(@_);
  $msg =~ s/[^a-zA-Z0-9_ :,.\@\/\[\]]/./g;        # safety most stuff to a dot
  CORE::system("$logcmd $msg");
}

Grab the
tarball
here

See the
sample
rcfile
here

 
Syntax highlighting using Syntax::Highlight::Engine::Kate