#!/usr/bin/perl -Tw

=head1 NAME

dspam -- DSPAM spam scanner plugin for qpsmtpd

=head1 DESCRIPTION

This plugin scans incoming mail with the DSPAM anti-spam scanner.

=head1 LICENSE

This plugin is licensed under the same terms as the qpsmtpd package itself.
Please see the LICENSE file included with qpsmtpd for details.

=cut
 
use strict;
use warnings;
 
sub register {
  my ($self, $qp, @args) = @_;
  my %args;

  if ($args[0] && $args[0] =~ /^(\/[\/\-\_\.a-z0-9A-Z]*)$/ && -x $1) {
    $self->{_dspam_loc} = $1;
    shift @args;
  }

  for (@args) {
    if (/^max_size=(\d+)$/) {
        $self->{_max_size} = $1;
    }
    elsif (/^dspam_path=(\/[\/\-\_\.a-z0-9A-Z]*)$/) {
        $self->{_dspam_loc} = $1;
    }
    elsif (/^dspam_conf=(\/[\/\-\_\.a-z0-9A-Z]*)$/) {
        $self->{_dspam_conf} = "$1";
    }
    elsif (/^tmp_dir=(\/[\/\-\_\.a-z0-9A-Z]*)$/) {
        $self->{_spool_dir} = $1;
    }
    elsif (/^action=(train|tag)$/) {
        $self->{_action} = $1;
    }
    elsif (/^spamlevel=(\d+)$/) {
      $self->{_spamlevel} = $1;
    }
    elsif (/^hamlevel=(\d+)$/) {
      $self->{_hamlevel} = $1;
    }
    elsif (/^keepheaders=(yes|no)$/) {
      $self->{_keepheaders} = $1;
    }

    else {
        $self->log(LOGERROR, "Unrecognized argument '$_' to dspam plugin");
        return undef;
    }
  }

  $self->{_spamlevel} ||= 9;    # Which level to train as spam (score > this)
  $self->{_hamlevel} ||= 5;     # Which level to train as ham (score < this)
  $self->{_action} ||= 'train'; # train or tag
  $self->{_keepheaders} = 'no';

  $self->{_dspam_user} ||= 'qpsmtpd';
  $self->{_dspam_loc} ||= '/usr/bin/dspam';
  $self->{_max_size} ||= 512 * 1024;
  $self->{_spool_dir} ||= $self->spool_dir();
  $self->{_dspam_conf} ||= '/etc/dspam/dspam.conf'; # make sure something is set

  unless ($self->{_spool_dir}) {
        $self->log(LOGERROR, "No spool dir configuration found");
        return undef;
  }
  unless (-d $self->{_spool_dir}) {
        $self->log(LOGERROR, "Spool dir $self->{_spool_dir} does not exist");
        return undef;
  }
}

sub hook_data_post {
  my ($self, $transaction) = @_;

  classify_dspam($self,$transaction);

  return (DECLINED);
}


sub hook_deny {
  my ( $self, $transaction, $prev_hook, $retval, $return_text ) = @_;

  # Now if SpamAssassin was the previous hook and it denied
  # then its likely spam and we want to check results and
  # maybe learn from it.

  if ($prev_hook eq 'spamassassin') {

    # If we are training its based on SpamAssassin results
    my $flag  = $transaction->header->get('X-Spam-Flag');
    my $spam = 0;
    if (defined($flag) and uc($flag) =~ m/YES/) {
      $spam = 1;
    }

    # Get SpamAssassin Score
    my $score = get_spam_score($self,$transaction);
    
    if ($self->{_action} eq 'train') {
      if (defined($score) and $spam == 1 and $score > $self->{_spamlevel}) {
	$self->log(LOGINFO, "Training email as spam ($score > $self->{_spamlevel})");
	train_dspam($self,$transaction,'spam');
      }
    }

    if ($self->{_action} eq 'tag') {
      
      my $result = $transaction->notes('X-DSPAM-Result');
      
      #   my $result  = $transaction->header->get('X-DSPAM-Result');
      
      # Now if overall result was SPAM and DSPAM tagged as HAM
      # then relearn. Also do the same if overall result was HAM and DSPAM
      # tagged as SPAM
      
      
      if (defined($result) and defined($score)) {
	if ($result =~ m/Innocent/ and $score > $self->{_spamlevel}) {
	  $self->log(LOGINFO, "Retraining email as spam classification ($score > $self->{_spamlevel})");
	  train_dspam($self,$transaction,'spam','error');

	# not like to get here.....  
	} elsif ($result =~ m/Spam/ and $score < $self->{_hamlevel}) {
	  $self->log(LOGINFO, "Retraining email as spam classification ($score < $self->{_hamlevel})");
	  train_dspam($self,$transaction,'innocent','error');
	}
      }
      
      # Now get rid of header which are no longer needed.
      if ($self->{_keepheaders} eq 'no') {
	$transaction->header->delete('X-DSPAM-Result');
	$transaction->header->delete('X-DSPAM-Confidence');
	$transaction->header->delete('X-DSPAM-Probability');
	$transaction->header->delete('X-DSPAM-Signature');
      }
    }
  }

  return DECLINED;
}

sub hook_queue {
  my ( $self, $transaction ) = @_;

  # I do assume that SpamAssassin has been active before queuing
  my $score = get_spam_score($self,$transaction);

  if ($self->{_action} eq 'train') {
    if (defined($score) and $score < $self->{_hamlevel}) {
      $self->log(LOGINFO, "Training email as innocent ($score < $self->{_hamlevel})");
      train_dspam($self,$transaction,'innocent');
    }
  }
  
  if ($self->{_action} eq 'tag') {

    my $result = $transaction->notes('X-DSPAM-Result');
#    my $result  = $transaction->header->get('X-DSPAM-Result');

    # Now if overall result was SPAM and DSPAM tagged as HAM 
    # then relearn. Also do the same if overall result was HAM and DSPAM
    # tagged as SPAM

    if (defined($result)) {
      if ($result =~ m/Innocent/ and $score > $self->{_spamlevel}) {
	$self->log(LOGINFO, "Retraining email as spam ($score > $self->{_spamlevel})");	  
	# retrain DSPAM as spam
	train_dspam($self,$transaction,'spam','error');
      } elsif ($result =~ m/Spam/ and $score < $self->{_hamlevel}) {
	$self->log(LOGINFO, "Retraining email as ham ($score < $self->{_hamlevel})");	  
	# retrains DSPAM as ham
	train_dspam($self,$transaction,'innocent','error');
      }
    }

    # Now get rid of header which are no longer needed.    
    if ($self->{_keepheaders} eq 'no') {
      $transaction->header->delete('X-DSPAM-Result');
      $transaction->header->delete('X-DSPAM-Confidence');
      $transaction->header->delete('X-DSPAM-Probability');
      $transaction->header->delete('X-DSPAM-Signature');
    }
  }

  return DECLINED;
}

sub get_spam_score {
  my ($self, $transaction) = @_;
  my $status  = $transaction->header->get('X-Spam-Status') or return;
  my ($score) = ($status =~ m/hits=(-?\d+\.\d+)/)[0];
  return $score;
}


sub train_dspam {

  my ($self, $transaction, $class, $source) = @_;

  my $filename = $transaction->body_filename;
  unless (defined $filename) {
    $self->log(LOGWARN, "didn't get a filename");
    return DECLINED;
  }
  my $mode = (stat($self->{_spool_dir}))[2];
  if ( $mode & 07077  ) { # must be sharing spool directory with external app
    # $self->log(LOGWARN, "Changing permissions on file to permit DSPAM access");
    chmod $mode, $filename;
  }

  my $cmd;

  # Set the default value
  if (not defined($source)) {
    $source ='corpus';
    $cmd = sprintf("/bin/cat %s | %s --user %s --client --mode=teft --class=%s --source=%s 2>&1",$filename,$self->{_dspam_loc},$self->{_dspam_user},$class,$source);
  } elsif ($source eq 'error') {
    # Here we only end if we have to re-train
    my $signature = $transaction->notes('X-DSPAM-Signature');
    $cmd = sprintf("/bin/cat %s | %s --user %s --client --mode=teft --class=%s --source=%s --signature=%s 2>&1",$filename,$self->{_dspam_loc},$self->{_dspam_user},$class,$source,$signature);
  }

  $self->log(LOGDEBUG, "Running: $cmd");
  my $output = `$cmd`;

  my $result = ($? >> 8);
  my $signal = ($? & 127);

  chomp($output);

  if ($output ne '') {
    $self->log(LOGINFO, "dspam results: $output");
  }

  return;
}

sub classify_dspam {
  my ($self, $transaction) = @_;

  if ($self->{_action} ne 'tag') {
    #$self->log(LOGINFO, "DSPAM: not in Tag mode");
    return (DECLINED);
  }

  if ($transaction->data_size > $self->{_max_size}) {
    $self->log(LOGWARN, 'Mail too large to scan ('.
	       $transaction->data_size . " vs $self->{_max_size})" );
    return (DECLINED);
  }
  
  my $filename = $transaction->body_filename;
  unless (defined $filename) {
    $self->log(LOGWARN, "didn't get a filename");
    return DECLINED;
  }

  my $mode = (stat($self->{_spool_dir}))[2];
  if ( $mode & 07077  ) { # must be sharing spool directory with external app
    #$self->log(LOGWARN, "Changing permissions on file to permit scanner access");
    chmod $mode, $filename;
  }
  
  my $cmd = sprintf("/bin/cat %s | %s --user %s --client --process --deliver=summary 2>&1",$filename,$self->{_dspam_loc},$self->{_dspam_user});

  $self->log(LOGDEBUG, "Running: $cmd");
  my $output = `$cmd`;

  my $result = ($? >> 8);
  my $signal = ($? & 127);

  chomp($output);

  if ($output ne '') {
    $self->log(LOGDEBUG, "dspam results: $output");
  }

  if ($signal) {
    $self->log(LOGINFO, "dspam exited with signal: $signal");
    return (DENYSOFT);
  }

#  $self->log(LOGINFO, "dspam results: $output");

  # Lets have a look a the result headers.
  # X-DSPAM-Result: qpsmtpd; result="Innocent"; class="Innocent"; probability=0.0000; confidence=0.99; signature=N/A

  my ($dspamResult) = $output =~ m/result=\"([^\"]+)\"/;
  my ($dspamProbability) = $output =~ m/probability=([^;]+);/;
  my ($dspamConfidence) = $output =~ m/confidence=([^;]+);/;
  my ($dspamSignature) = $output =~ m/signature=(.*)/;

    if (not defined($dspamResult)
        or not defined($dspamConfidence)
        or not defined($dspamProbability)) {

      $self->log(LOGINFO, "dspam provided no headers ($output)");
    } else {
      $self->log(LOGINFO, "dspam result: $dspamResult with Confidence of $dspamConfidence and Probability of $dspamProbability ($dspamSignature)");

      $transaction->header->add('X-DSPAM-Result', $dspamResult, 0);
      $transaction->header->add('X-DSPAM-Confidence', $dspamConfidence, 0);
      $transaction->header->add('X-DSPAM-Probability', $dspamProbability, 0);
      $transaction->header->add('X-DSPAM-Signature', $dspamSignature, 0);
      
      # Set a note that the headers are there
      $transaction->notes('X-DSPAM-Result', $dspamResult);
      $transaction->notes('X-DSPAM-Signature', $dspamSignature);

    }
}
 
1;

