#! /usr/bin/perl -wT

# get local DCC parameters for DCC whitelist CGI scripts.

# Copyright (c) 2006 by Rhyolite Software, LLC
#
# This agreement is not applicable to any entity which sells anti-spam
# solutions to others or provides an anti-spam solution as part of a
# security solution sold to other entities, or to a private network
# which employs the DCC or uses data provided by operation of the DCC
# but does not provide corresponding data to other users.
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# Parties not eligible to receive a license under this agreement can
# obtain a commercial license to use DCC and permission to use
# U.S. Patent 6,330,590 by contacting Commtouch at http://www.commtouch.com/
# or by email to nospam@commtouch.com.
#
# A commercial license would be for Distributed Checksum and Reputation
# Clearinghouse software.  That software includes additional features.  This
# free license for Distributed ChecksumClearinghouse Software does not in any
# way grant permision to use Distributed Checksum and Reputation Clearinghouse
# software
#
# THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
# BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
# OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
# SOFTWARE.
#	Rhyolite Software DCC 1.3.45-1.64 $Revision$
#	Generated automatically from common.in by configure.

# check this file by running it separately
use strict 'subs';

use integer;

use 5.004;
use Fcntl qw(:DEFAULT :flock);
use POSIX qw(strftime);

# quiet Perl taint checks with a path that should work everywhere for
#   the few commands these scripts use.
$ENV{PATH}="/sbin:/bin:/usr/sbin:/usr/bin";


# so this file can be used with do('/var/dcc/cgibin/common')
#   besides, check_user() must be called before html_head()
return check_user();



sub debug_time {
    return if (!$query{debug});

    my(@ts, $ts);
    require 'sys/syscall.ph';

    $ts = pack("LL", ());
    syscall(&SYS_gettimeofday, $ts, 0);
    @ts = unpack("LL", $ts);

    printf STDERR "%38s", $_[0];
    print STDERR strftime(" %X", localtime($ts[0]));
    printf STDERR ".%03d", $ts[1]/1000;
    printf STDERR " %.3f", $_ foreach times;
    print STDERR "\n";
}



# emit HTTP/HTML header
sub html_head {
    my($title,			# title of the web page
       $refresh_url) = @_;	# next step in re-login sequence if not null

    print <<EOF;
Content-type: text/html; charset=iso-8859-1
Expires: Thu, 01 Dec 1994 16:00:00 GMT
pragma: no-cache

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<HTML>
<HEAD>
    <TITLE>$title</TITLE>
    <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
    <META HTTP-EQUIV="Content-Style-Type" CONTENT="text/css">
EOF

    print "<META HTTP-EQUIV=refresh content=\"1;url=$refresh_url\">\n"
	if ($refresh_url);

    print <<EOF;
    <STYLE type="text/css">
	<!--
	BODY {background-color:white; color:black}
	.warn {color:red}
	.mono {font-family:monospace}
	.small {font-size:smaller}
	.strong {font-weight:bolder}
	TABLE {white-space:nowrap}
	IMG.logo {width:6em; vertical-align:middle}
	ADDRESS {font-size:smaller}
	-->
    </STYLE>
</HEAD>
<BODY>
<H2>Control DCC Log and Whitelist for <EM>$user</EM> at $hostname</H2>

EOF
}



sub html_footer {
    my($line);

    if (!open(FOOTER, "$user_dir/footer")
	&& !open(FOOTER, "/var/dcc/cgibin/footer")) {
	# complain to httpd-error.log
	print STDERR "open(/var/dcc/cgibin/footer): $!\n";
	return;
    }

    while ($line = <FOOTER>) {
	print $line;
    }
    close(FOOTER);
}



local($list_log_url, $list_log_link, $list_msg_link,
      $edit_url, $edit_link, $passwd_url, $passwd_link, $logoutID,
      $url_ques, $url_suffix, $form_hidden);

sub common_buttons {
    my($msg, $cur, $list_log, $edit, $passwd, $id);


    $msg = $query{msg} ? "${url_ques}msg=$query{msg}" : "";

    $cur = "$ENV{SCRIPT_NAME}$url_suffix";
    $list_log = ($cur ne $list_log_url
		 ? "$list_log_link$msg\">Log</A>"
		 : "List Log");
    $edit = ($cur ne $edit_url
	     ? "$edit_link\">Settings</A>"
	     : "Settings");
    $passwd = ($cur ne $passwd_url
	       ? "$passwd_link\">Password</A>"
	       : "Password");

    print <<EOF;
<TABLE>
<TR><TD>$list_log
    <TD>$edit
    <TD>$passwd
    <TD><A HREF="$cur${url_ques}logoutID=$logoutID">LogOut/In</A>
EOF
}



# give up, but not entirely, with an HTML whine
sub html_whine {
    my($msg) = @_;

    html_head("Internal Error");
    common_buttons();
    print <<EOF;
</TABLE>
<H1>Internal Error</H1>
<P class=warn>$msg
<P><HR>
$ENV{SERVER_SIGNATURE}
</BODY>
</HTML>
EOF
    exit;
}



# die with an HTML whine
sub html_die {
    my($msg) = @_;

    # put the message into the httpd error log
    print STDERR "DCC CGI script internal error: $msg\n";

    html_head("Internal Error");
    print <<EOF;
<P class=warn>$msg
<P><HR>
$ENV{SERVER_SIGNATURE}
</BODY>
</HTML>
EOF
    exit;
}


# punt to some other web page, perhaps after the logout/in kludge
#   this cannot be used after html_head()
sub punt2 {
    my($msg,				# message saying why
       $url) = @_;			# the other web page

    # don't punt a punt
    html_die($msg) if ($query{result});

    $url = ((($ENV{HTTPS} && $ENV{HTTPS} eq "on") ? "https://" : "http://")
	    . $ENV{SERVER_NAME}
	    . $url);
    $url .= $url_ques."result=".url_encode($msg) if ($msg);

    print "Status: 302 Moved Temporarily\nLocation: $url\n";
    html_head("redirect to $url");
    print "redirecting to $url\n</BODY>\n</HTML>\n";
    exit;
}



# Check authentication and gather system parameters.
#   Require a user name as well as one that can't be used as a sneaky path.
sub check_user {
    my($sub_args, $cks, $thold, $line, @setting);

    if ($ENV{HTTP_NAME}) {
	$hostname = $ENV{HTTP_NAME};
    } elsif ($ENV{SERVER_NAME}){
	$hostname = $ENV{SERVER_NAME};
    } else {
	$hostname=`hostname`;
	chop($hostname);
    }

    $user = $ENV{REMOTE_USER};
    if (!$user){
	$user = '';
	html_die("no user name")
    }
    # allow the user name to be a subdirectory
    html_die("user name $user is invalid")
	if ($user =~ /\.\./ || $user !~ /^([-\/.,#_%a-z0-9]+)$/i);
    $user = $1;				# stop Perl taint warnings

    # convert the user name to lower case because sendmail likes to
    $user =~ tr/A-Z/a-z/;

    # rely on the /var/dcc/dcc_conf configuration file for almost everything
    $DCC_HOMEDIR = "/etc/dcc";		# unneeded except for compatibility
    $DCCM_USERDIRS = "userdirs";
    $DCCM_ENABLE = "on";
    $DCCIFD_ENABLE = "off";
    open(CONF, '2>/dev/null sh -c \'. /etc/dcc/dcc_conf;
		echo DCCM_ENABLE="$DCCM_ENABLE";
		echo DCCM_USERDIRS="$DCCM_USERDIRS";
		echo DCCM_ARGS="$DCCM_ARGS";
		echo DCCM_REJECT_AT="$DCCM_REJECT_AT";
		echo DCCM_CKSUMS="$DCCM_CKSUMS";
		echo DCCIFD_USERDIRS="$DCCIFD_USERDIRS";
		echo DCCIFD_ENABLE="$DCCIFD_ENABLE";
		echo DCCIFD_ARGS="$DCCIFD_ARGS";
		echo DCCIFD_REJECT_AT="$DCCIFD_REJECT_AT";
		echo DCCIFD_CKSUMS="$DCCIFD_CKSUMS";
		echo GREY_CLIENT_ARGS="$GREY_CLIENT_ARGS";
		echo DNSBL_ARGS="$DNSBL_ARGS";
		echo XFLTR_ARGS="$XFLTR_ARGS";
		\'|')
	|| html_die("cannot get DCC configuration");
    while ($line = <CONF>) {
	chop($line);
	@setting = split(/=/, $line);
	if (!$setting[1]) {
	    ${$setting[0]} = "";
	} elsif ($setting[1] =~ /^([-0-9,.\/a-z_]*)$/i) {
	    ${$setting[0]} = $1;	# suppress taint warnings on good paths
	} else {
	    ${$setting[0]} = $setting[1];
	}
    }
    close(CONF);

    $main_whiteclnt = "/etc/dcc/whiteclnt";
    if ($DCCM_ENABLE eq "off" && $DCCIFD_ENABLE eq "on") {
	$sub_args = $DCCIFD_ARGS;
	$cks = $DCCIFD_CKSUMS;
	$thold = $DCCIFD_REJECT_AT;
	$logout_tmpdir = "/etc/dcc/$DCCIFD_USERDIRS/tmp";
	# Assume "name" per-user directory for simple dccifd user names.
	$user_dir = "/etc/dcc/$DCCIFD_USERDIRS/$user";
    } else {
	$sub_args = $DCCM_ARGS;
	$cks = $DCCM_CKSUMS;
	$thold = $DCCM_REJECT_AT;
	$logout_tmpdir = "/etc/dcc/$DCCM_USERDIRS/tmp";
	# Assume "local/name" per-user directory for simple dccm user names.
	$user_dir = ($user =~ /\//) ? $user : "local/$user";
	$user_dir = "/etc/dcc/$DCCM_USERDIRS/$user_dir";
    }
    html_die("no user directory $user_dir")
	if (! -d $user_dir) ;
    $logdir = "$user_dir/log";
    $whiteclnt = "$user_dir/whiteclnt";

    # Figure out which substitute headers are turned on
    #	This does not detect all possible SMTP "field names," but it also
    #	won't get Perl confused with field names such as 'foo[bar]'.
    $sub_hdrs = "";
    $sub_hdrs .= "|$1"
	while ($sub_args && $sub_args =~ s/(?:-[VdbxANQW]*S\s*)
	       ((?i:[-a-z_0-9]+))
	       ($|\s+)
	       /$2/x);
    $sub_white = $sub_hdrs;
    # pattern matching optional or substitute SMTP headers
    $sub_hdrs =~ s/^\|+//;
    # pattern matching optional or substitute checksum types
    $sub_white =~ s/\|/)|(substitute\\s+/g;
    $sub_white =~ s/^[|)(]*/(/;
    $sub_white .= ')';

    # names of checksums whose thresholds can be set
    $thold_cks_cmn = 'Body,Fuz1,Fuz2';
    $thold_cks = $thold_cks_cmn;
    # all checksums including those not kept by (almost all) DCC servers
    #$thold_cks_all = 'IP,env_From,From,env_To,Message-ID,' . $thold_cks;

    # compute default checksum thresholds
    if ($thold) {
	$cks = $thold_cks_cmn if (!$cks);
	foreach my $ck (split(/,/,$cks)) {
	    my ($t,$v) = ($ck, $thold);
	    $conf_cks_tholds{$t} = "<B>$v</B> <SMALL>by default in /etc/dcc/dcc_conf</SMALL>"
		if (parse_thold_value($t, $v));
	}
    }

    $cgibin = $ENV{SCRIPT_NAME};
    # trim the name of our script from the path
    $cgibin =~ s!/+[^/]+$!!;
    # trim extra leading /s that can mess up our generated links
    $cgibin =~ s!^/{2,}!/!;

    get_query();

    return 1;
}



# Get user's parameters
sub get_query {
    my($buffer, $name, $value);

    if ($ENV{REQUEST_METHOD} eq "GET") {
	$buffer = $ENV{'QUERY_STRING'};
    } elsif (!$ENV{CONTENT_LENGTH}) {
	$buffer = '';
    } else {
	read(STDIN, $buffer, $ENV{CONTENT_LENGTH});
    }
    $buffer =~ tr/+/ /;
    foreach my $pair (split(/&/, $buffer)) {
	($name, $value) = split(/=/, $pair);
	$name =~ s/%([a-fA-F0-9]{2})/pack("C", hex($1))/eg;
	if ($value) {
	    $value =~ s/%([a-fA-F0-9]{2})/pack("C", hex($1))/eg;
	} else {
	    $value = "";
	}
	$query{$name} = $value;
    }

    $url_ques = '?';
    $url_suffix = '';
    $form_hidden = '';
    if ($query{debug}) {
	print STDERR "\n";
	debug_time("start $ENV{SCRIPT_NAME}");
	print STDERR "            ";
	print STDERR " $_=\"$query{$_}\"" foreach (keys %query);
	print STDERR " ENV{AuthName}=\"$ENV{AuthName}\"" if ($ENV{AuthName});
	print STDERR "\n";
	$url_suffix = '?debug=1';
	$url_ques = '&';
	$form_hidden = "<INPUT type=hidden name=debug value=on>";
    }

    $list_log_url = "$cgibin/list-log$url_suffix";
    $list_log_link = "<A HREF=\"$list_log_url";
    $list_msg_link = "<A HREF=\"$cgibin/list-msg$url_suffix";
    $edit_url = "$cgibin/edit-whiteclnt$url_suffix";
    $edit_link = "<A HREF=\"$edit_url";
    $passwd_url = "$cgibin/chgpasswd$url_suffix";
    $passwd_link = "<A HREF=\"$passwd_url${url_ques}goback=$ENV{SCRIPT_NAME}";

    $logoutID = $ENV{UNIQUE_ID};
    # do the best we can if Apache mod_unique_id is not present
    $logoutID = "$ENV{REMOTE_ADDR}-$ENV{REMOTE_PORT}-$$-" . time()
	if (! $logoutID);
    $logoutID = url_encode($logoutID);

    # kludge to handle "logout" button including recognizing that we have
    #   already handled it.  The usual tactic of requiring the user to
    #   specifying a new username and then using a cookie seems ugly.
    $tfile = $query{logoutID};
    if ($tfile && $tfile =~ /^([a-z0-9]+)$/i) {
	$tfile = "$logout_tmpdir/logout.$1";

	# delete any old logout marker files
	my($old_tfiles) = `find $logout_tmpdir -name 'logout.*' -mtime +1`;
	`/bin/rm $old_tfiles` if ($old_tfiles);

	# Look for our logout marker file.
	if (-f $tfile) {
	    # If it exists, then we have been here before, so just delete it.
	    # and refresh
	    unlink $tfile;
	    punt2("", "$ENV{SCRIPT_NAME}$url_suffix");

	} else {
	    # If it does not exist, create it & force a cycle of authentication.
	    if (!open(TFILE, "> $tfile")) {
		print STDERR "open($tfile): $!\n";
		html_whine("open($tfile): $!", $edit_url);
	    }
	    while (($name,$value) = each %ENV) {
		print TFILE "$name=$value\n";
	    }

	    # Demand a new user name and password
	    my($AuthName) = $ENV{AuthName} ? $ENV{AuthName} : "DCC user";
	    print <<EOF;
WWW-authenticate: Basic realm="$AuthName"
Status: 401 Unauthorized
EOF
	    html_head("Access Failure");
	    print "<P class=warn>\n";
	    print $msg ? $msg : "Access Failure";
	    print "\n</BODY></HTML>\n";
	    exit;
	}
    }
}




##########################################################################

# %-encode text for a URL
sub url_encode {
    my($out) = @_;

    $out =~ s/([^-_.+!*(),0-9a-zA-Z])/sprintf("%%%02X",ord($1))/eg;
    return $out;
}



# encode text for ordinary HTML to avoid special HTML flags such as '<'
#   retain newlines
sub html_str_encode {
    my($out) = @_;

    $out =~ s/&/&amp;/g;
    $out =~ s/</&lt;/g;
    $out =~ s/>/&gt;/g;
    $out =~ s/([\00-\10\13-\17\42\47\177-\377])/sprintf("&#%d;",ord($1))/eg;
    return $out;
}



# encode text for HTML, and replace newlines with <BR>
sub html_text_encode {
    my($out) = html_str_encode(@_);
    $out =~ s/\n/<BR>\n/g;
    return $out;
}



# encode text for HTML, trimmed to at most 32 characters with the end replaced
#   by an ellipsis if too long
sub hdr_trim_encode {
    my($out) = @_;

    return "&nbsp;" if (!$out);

    return html_str_encode($out) if (length($out) <= 32);

    $out = substr($out, 0, 28)
	if ($out !~ s/(^.{20,28}[^<>.@\t ])[<>.@\t ].*/$1/);
    $out = html_str_encode($out);
    $out .=  "&nbsp;...";
    return $out;
}



##########################################################################
# Open and parse a log message
# sets these globals
#	$msg_date		# envelope
#	$msg_helo		# envelope
#	$msg_ip			# envelope
#	$msg_client_name	# envelope
#	$msg_env_from		# envelope
#	@msg_env_to		# envelope
#	$msg_mail_host		# envelope
#	$msg_from		# header
#	$msg_subject
#	$msg_hdrs
#	$msg_body
#	$msg_cksums
#	$msg_result

sub parse_log_msg {
    my($msg, $path, $no_body) = @_;
    my(@error, $line, $cur_hdr, $hdr_type, $misc_hdr, $seen_message_id,
       $ise_msg, $cksum_marker, $cksum_marker_p);

    undef $msg_date;
    undef $msg_helo;
    undef $msg_ip;
    undef $msg_client_name;
    undef $msg_env_from;
    undef @msg_env_to;
    undef $msg_mail_host;
    undef $msg_from;
    undef $msg_subject;
    $msg_hdrs = '';
    $msg_body = '';
    $msg_cksums = '';
    $msg_result = '';

    $ise_msg = "Internal Server Error";
    $cksum_marker = "### end of message body ########################\n";
    $cksum_marker_p = qr/^### end of message body ########################\s*$/;

    sysopen(MSG, $path, O_RDONLY, 0)
	|| return ($ise_msg, "open($path): $!");

    return ($ise_msg, "empty msg.$msg") if (!($msg_date = <MSG>));

    if ($msg_date !~ /^VERSION/) {
	close(MSG);
	return ($ise_msg, "format of msg.$msg unrecognized");
    }
    if (!($msg_date = <MSG>)) {
	close(MSG);
	return ($ise_msg, "message $msg truncated after VERSION line");
    }
    if (!($msg_date =~ s/^DATE: +(.*) +[^ ]+/$1/)) {
	close(MSG);
	return ($ise_msg, "unrecognized DATE line $msg_date in message $msg");
    }

    if (!($msg_ip = <MSG>)) {
	close(MSG);
	return ($ise_msg, "message $msg truncated in envelope");
    }
    if ($msg_ip =~ /^IP: ([^ ]*) +([:.0-9a-f]{14,47})$/i) {
	$msg_ip = $2;
	$msg_client_name = $1;
	$msg_ip =~ s/^::ffff://i;
	$msg_client_name =~ s/^\[.*]$//;
	$msg_client_name = ' ' if ($msg_client_name eq '');
	if (!($msg_helo = <MSG>)) {
	    close(MSG);
	    return ($ise_msg, "message $msg truncated in envelope");
	}
	chop($msg_helo);
    } else {
	# no IP line
	$msg_helo = $msg_ip;
	undef $msg_ip;
    }
    if (!($msg_helo =~ s/^HELO: //)) {
	# no HELO line
	$msg_env_from = $msg_helo;
	undef($msg_helo);
    } else {
	if (!($msg_env_from = <MSG>)) {
	    close(MSG);
	    return ($ise_msg, "message $msg truncated after HELO line");
	}
	chop($msg_env_from);
    }
    if (!($msg_env_from =~ s/^env_From: //)) {
	# no env_from line
	$line = $msg_env_from;
	undef($msg_env_from);
    } else {
	$msg_mail_host = $msg_env_from;
	$msg_mail_host =~ s/.*mail_host=(.*)/$1/;
	$msg_env_from =~ s/<?([^\t> ]*).*/$1/;
	$line = <MSG>;
    }

    # Save the envelope env_To lines.
    for (;;) {
	if (! $line) {
	    close(MSG);
	    return ($ise_msg, "message $msg truncated in envelope");
	}
	last if ($line =~ /^[\r\n]*$/);
	if ($line eq "abort\n") {
	    close(MSG);
	    return ("aborted transaction", "");
	}
	push(@msg_env_to, $1) if ($line =~ /env_To:[\t ]*<?([^\t> ]+).*/);
	$line = <MSG>;
    }


    # Look for header lines that get checksums as we collect the whole message.
    $new_hdr = "";
    undef($hdr_type);
    for (;;) {
	if (!($line = <MSG>)) {
	    close(MSG);
	    return ($ise_msg, "message $msg truncated in headers");
	}

	# deal with header continuation
	if ($line =~ /^[\t ]+/) {
	    $new_hdr .= $line;
	    $$cur_hdr .= $line if ($cur_hdr);
	    next;
	}

	if ($cur_hdr) {
	    # end a preceding interesting header
	    $$cur_hdr =~ s/[\t ]*\n[\r\s]*/ /g;
	    $$cur_hdr =~ s/^\s+//;
	    $$cur_hdr =~ s/\s+$//;
	    # emit a link
	    if (!$no_body) {
		if ($hdr_type) {
		    $msg_hdrs .= "$edit_link${url_ques}type=$hdr_type&amp;val=";
		    $msg_hdrs .= url_encode($$cur_hdr);
		    $msg_hdrs .= "&amp;msg=$msg&amp;auto=1\">";
		    chop($new_hdr);
		    $msg_hdrs .= html_str_encode($new_hdr);
		    $msg_hdrs .= "</A>\n";
		    undef($hdr_type);
		} else {
		    $msg_hdrs .= html_str_encode($new_hdr);
		}
	    }
	    undef $cur_hdr;
	} else {
	    # end preceding boring header
	    $msg_hdrs .= html_str_encode($new_hdr);
	}


	# stop after the headers
	last if ($line eq "\n");

	$new_hdr = $line;

	# Start an interesting header
	if ($line =~ s/^from:\s*//i) {
	    $hdr_type = "from";
	    $msg_from = $line;
	    $cur_hdr = \$msg_from;
	    next;
	}
	if ($line =~ s/^(message-id):\s*//i) {
	    $hdr_type = $1;
	    $hdr_type =~ tr/-A-Z/_a-z/;
	    $misc_hdr = $line;
	    $cur_hdr = \$misc_hdr;
	    $seen_message_id = 1
		if ($hdr_type eq "message_id");
	    next;
	}
	if ($line =~ s/^subject:\s*//i) {
	    $hdr_type = url_encode("substitute subject")
		if ('subject:' =~ /^($sub_hdrs):/i);
	    $msg_subject = $line;
	    $cur_hdr = \$msg_subject;
	    next;
	}

	if (!$no_body && $line =~ s/^($sub_hdrs):\s*//i) {
	    $hdr_type = $1;
	    $hdr_type =~ tr/A-Z/a-z/;
	    $hdr_type = url_encode("substitute $hdr_type");
	    $misc_hdr = $line;
	    $cur_hdr = \$misc_hdr;
	    next;
	}
    }

    # fake empty Message-ID if required
    if (! $seen_message_id) {
	$msg_hdrs .= "$edit_link${url_ques}type=";
	$msg_hdrs .= url_encode("message_id");
	$msg_hdrs .= "&amp;val=%3c%3e&amp;msg=$msg&amp;auto=1\">missing Message-ID</A>\n";
    }

    # copy the body of the message
    for (;;) {
	if (!($line = <MSG>)) {
	    close(MSG);
	    return ($ise_msg, "message $msg truncated in body");
	}
	last if ($line =~ $cksum_marker_p);
	$line =~ s/[ \t\r]+$//mg;
	$msg_body .= html_text_encode($line) if (!$no_body);
    }


    # copy the checksums
    while ($line = <MSG>) {
	# notice quoted checksums that are part of the body
	if ($line =~ $cksum_marker_p) {
	    if (!$no_body) {
		$msg_body .= "<PRE class=mono>\n";
		$msg_body .= $cksum_marker;
		$msg_body .= $msg_cksums;
		$msg_body .= "</PRE>\n";
	    }
	    $msg_cksums = '';
	    $msg_result = '';
	    next;
	}
	$msg_result .= "MTA " if ($line =~ /\bMTA-->spam(|\(first\))\b/);
	$msg_result .= "MTA-OK " if ($line =~ /\bMTA-->OK(|\(first\))\b/);
	$msg_result .= "BL " if ($line =~ /\bwlist-->spam\s/);
	$msg_result .= "WL " if ($line =~ /\bwlist-->OK\s/);
	$msg_result .= "DCC " if ($line =~ /\bDCC-->spam\s/);
	$msg_result .= "<I>DCC</I> " if ($line =~ /\bDCC-->spam\(off\)\s/);
	$msg_result .= "OK-DCC " if ($line =~ /\bDCC-->OK\s/);
	$msg_result .= "<I>OK-DCC</I> " if ($line =~ /\bDCC-->OK\(off\)\s/);
	$msg_result .= "Rep " if ($line =~ /\bRep-->spam\s/);
	$msg_result .= "<I>Rep</I> " if ($line =~ /\bRep-->spam\(off\)\s/);
	$msg_result .= "DNSBL " if ($line =~ /\bDNSBL-->spam\s/);
	$msg_result .= "<I>DNSBL</I> " if ($line =~ /\bDNSBL-->spam\(off\)\s/);
	$msg_result .= "Xfltr " if ($line =~ /\bXFLTR-->spam\s/);
	$msg_result .= "<I>Xfltr</I> " if ($line =~ /\bXFLTR-->spam\(off\)\s/);
	if ($line =~ /^result: temporary greylist embargo/) {
	    $msg_result = "Grey: " . $msg_result;
	} elsif ($line =~ /^result: accept after greylist embargo/) {
	    $msg_result = "OK-Grey: " . $msg_result;
	} elsif ($line =~ /^result: accept/) {
	    $msg_result = "OK: " . $msg_result;
	} elsif ($line =~ /^result: reject/) {
	    $msg_result = "Reject: " . $msg_result;
	} elsif ($line =~ /^result: discard/) {
	    $msg_result = "Disard: " . $msg_result;
	}
	$msg_cksums .= $line;
    }
    $msg_cksums = html_str_encode($msg_cksums) if (!$no_body);


    close(MSG);
    return undef;
}



local(%msgs_cache, $cache_line_len, $cache_pack,
      $cache_version, %msgs_cache_state,
      %msgs_date, %msgs_result, %msgs_from, %msgs_subject,
      $msg_day_first, $msg_day_last, $msg_first, $msg_last,
      $msg_newer, $msg_part_num, @msgs_num);


# flush the cache files
sub cache_write_line {
    my($buf, $cnum) = @_;
    my($tmp, $cfname, $date);

    $tmp = "msg.cache." . "new." . $$;
    if (!sysopen(CFILE, $tmp, O_WRONLY | O_CREAT, 0660)){
	print STDERR "open($tmp): $!\n";
	return undef;
    }
    if (syswrite(CFILE, $cache_version) != length($cache_version)
	|| syswrite(CFILE, $buf) != length($buf)) {
	print STDERR "syswrite $tmp: $!\n";
	close(CFILE);
	unlink($tmp);
	return undef;
    }

    close(CFILE);
    $cnum =~ /(\d+)/; $cnum = $1;	# suppress Perl taint warning
    $date = $cnum * (24*3600);
    if ($date <= time) {
	utime($date, $date, $tmp)
	    || print STDERR "utime($date, $date, $tmp): $!\n";
    }
    $cfname = "msg.cache." . $cnum;
    if (!rename($tmp, $cfname)) {
	print STDERR "rename($tmp, $cfname): $!\n";
	unlink($tmp);
	return undef;
    }

    $msgs_cache_state{$cnum} = 1;
    return 1;
}



sub cache_flush {
    my($cache_files, $log_files,
       $cnum, $cfname, $state, $new_cnum, $msg, $buf, $buf_start);

    if (! -w ".") {
	my $marker = "$logout_tmpdir/msg.$user-nocache";
	if (! -f $marker
	    || (stat(_))[9] < time()-(4*3600)) {
	    if (!open(CFILE,">>",$marker)){
		print STDERR "open($marker): $!\n";
	    } else {
		print CFILE "$logdir not writable for cache files\n";
		close CFILE;
	    }
	    print STDERR "$logdir not writable for cache files\n";
	}
	return;
    }

    $cache_files = 0;
    $log_files = 0;
    $buf_start = 0;

    $cnum = 0;
    foreach $msg (@msgs_num) {
	$new_cnum = $msgs_cache{$msg}[0] / (24*3600);
	next if ($msgs_cache_state{$new_cnum});

	if ($cnum != $new_cnum) {
	    if ($log_files - $buf_start > 10) {
		++$cache_files;
		return if (!cache_write_line($buf, $cnum));
		$buf_start = $log_files;
	    } else {
		$log_files= $buf_start;
	    }
	    undef $buf;
	    $cnum = $new_cnum;
	}

	++$log_files;
	$buf .= pack($cache_pack,
		     $msgs_cache{$msg}[0],
		     $msgs_cache{$msg}[1],
		     $msg,
		     $msgs_cache{$msg}[2] ? $msgs_cache{$msg}[2] : 0);
    }
    if ($log_files - $buf_start > 10) {
	++$cache_files;
	cache_write_line($buf, $cnum);
    } else {
	$log_files= $buf_start;
    }

    # delete junk cache files
    while (($cnum, $state) = each %msgs_cache_state) {
	next if ($state);
	$cfname = "msg.cache." . $cnum;
	if (-f $cfname && !unlink($cfname)) {
	    print STDERR "unlink($cfname): $!\n";
	    return;
	}
    }

    debug_time("flushed $cache_files cache files with $log_files files");
}



# get the list of messages
#   The first arg is the current file
#   Try to limit the size of the table to the second arg
#	divide days worth of files to fit the page size if it is <0
#
#   sets globals %msgs_date, %msgs_result, %msgs_from, %msgs_subject,
#	$msg_day_first, $msg_day_last, $msg_first, $msg_last,
#	$msg_newer, $msg_part_num, @msgs_num
sub get_log_msgs {
    my($page_msg,			# target log message
       $page_size,			# log files / web page
       $mode				# 0=old, 1=reverse sort & divide days
       ) = @_;
    my($cache_len, $need_flush, $cache_parse_limit, $dir_len,
       $line, $msg, $ino, $cnum, $entry, $days, $sort_order,
       $msg_tgt, $date_tgt, $date, $date1, $msg_num, $msg_num_prev, $start);

    $cache_parse_limit = 100;

    $cache_version = "DCC msg.cache version 2\n";
    $cache_pack = "LLA6";
    $cache_line_len = length(pack($cache_pack, 0));


    # reverse the sort for old callers
    $sort_order = !$mode ? -1 : 1;


    # Build a list of log file names and dates
    #	Use cache files of previously parsed dates.  Validate the
    #	files by checking i-numbers.  Use the `ls` command because the
    #	the Perl readdir() function does not provide d_ino/d_fileno.
    chdir($logdir) || html_whine("chdir($logdir): $!");

    open(DIR, "/bin/ls -ifC1 |") or html_whine("ls -ifC1 $logdir: $!");
    $dir_len = 0;
    while ($line = <DIR>) {
	if ($line =~ /^\s*(\d+)\s+msg\.([A-Za-z0-9]{6})\s*$/) {
	    $msgs_cache{$2}[1] = $1;
	    ++$dir_len;
	    next;
	}
	# notice cache files
	$msgs_cache_state{$1} = ()
	    if ($line =~ /^\s*\d+\s+msg\.cache\.(\d{5})\s*$/
		&& -f "msg.cache.$1"
		&& (((stat(_))[7] - length($cache_version))
		    % $cache_line_len) == 0);
    }
    close(DIR);
    debug_time("$dir_len files found");

    # load the cache files
    $cache_len = 0;
    $need_flush = 0;
    foreach $cnum (sort keys(%msgs_cache_state)) {
	my($total, $good, $date_lo, $date_hi);

	if (!open(CFILE, "msg.cache." . $cnum)) {
	    $msgs_cache_state{$cnum} = 0;
	    next;
	}

	if (!read(CFILE, $buf, length($cache_version))
	    || $buf ne $cache_version) {
	    close CFILE;
	    next;
	}

	$date_lo = $cnum * 24*3600;
	$date_hi = $date_lo + 24*3600 - 1;
	$good = $total = 0;
	while (read(CFILE, $buf, $cache_line_len)) {
	    ($date, $ino, $msg) = unpack($cache_pack, $buf);
	    ++$total;

	    # cache file is bogus if it contains bad dates
	    last if ($date < $date_lo || $date > $date_hi);

	    # skip deleted log files
	    next if (!exists($msgs_cache{$msg}));

	    # skip log file name that have been recycled
	    next if ($msgs_cache{$msg}[1] != $ino);

	    $msgs_cache{$msg}[0] = $date;
	    push @msgs_num, $msg;

	    ++$good;
	}
	close(CFILE);
	if ($good == 0 || $good+20 < $total) {
	    $need_flush = 1;
	} elsif ($good == $total) {
	    $msgs_cache_state{$cnum} = 1;
	}
	$cache_len += $good;
    }
    debug_time("$cache_len files cached");

    # If there are any new log files,
    #	then we must get their dates and then sort all of the names
    if ($cache_len != $dir_len) {
	$need_flush = 1 if ($dir_len > $cache_len+100);
	$msg_num = 0;
	while (($msg, $entry) = each %msgs_cache) {
	    next if (@$entry[0]);
	    $date = (stat "msg.$msg")[9];
	    if (!$date) {
		delete $msgs_cache{$msg};
		next;
	    }
	    @$entry[0] = $date;
	    $msgs_cache_state{$date / (24*3600)} = 0;
	    ++$msg_num;
	}
	debug_time("$msg_num files dated");

	@msgs_num = map substr($_,4),
			(sort map pack("NA6",
				       $msgs_cache{$_}[0]*$sort_order,
				       $_),
				    keys %msgs_cache);
	debug_time("sorted " . ($#msgs_num+1) . " files");
    }

    # find the target message that must be listed
    $msg_tgt = ($sort_order > 0 && $#msgs_num >= 0) ? $#msgs_num : 0;
    if ($page_msg) {
	for ($msg_num = 0; $msg_num <= $#msgs_num; ++$msg_num) {
	    if ($msgs_num[$msg_num] eq $page_msg) {
		$msg_tgt = $msg_num;
		last;
	    }
	}
	debug_time("found #" . $msg_tgt);
    }

    # we are finished if the caller only wanted the list of files
    #   perhaps for URLs pointing to previous and next files
    if (!$page_size || $page_size < 1 || $#msgs_num < 0) {
	cache_flush() if ($need_flush);
	$msg_first = $msg_tgt;
	$msg_last = $#msgs_num;
	$msg_newer = $#msgs_num;
	$msg_part_num = 0;
	$msgs_mtime{$page_msg} = 1 if (!$mode && $page_msg);
	return;
    }

    # Get summary information from all of the files on the target day, the
    #	last file on the previous day, and on the first file on the next day.
    #
    # walk backward from the target to the first log file of the target day
    $date_tgt = $date = (localtime $msgs_cache{$msgs_num[$msg_tgt]}[0])[7];
    for ($msg_day_first = $msg_tgt;
	 $msg_day_first > 0;
	 $msg_day_first = $msg_num) {
	$msg_num = $msg_day_first-1;
	$date = (localtime $msgs_cache{$msgs_num[$msg_num]}[0])[7];
	last if ($date != $date_tgt);
    }
    if (!$mode) {
	$msg_part_num = 0;
	$msg_first = $msg_day_first;
    } else {
	$msg_part_num = ($msg_tgt - $msg_day_first) / $page_size;
	$msg_first = $msg_day_first + ($msg_part_num * $page_size);
    }

    # walk forward to the end of the day or $page_size files
    $days = 0;				# count space used by date headings
    $msg_last = $msg_first + $page_size-1;
    $msg_last = $#msgs_num if ($msg_last > $#msgs_num);
    $msg_newer = $msg_first + $page_size;
    $msg_newer = $#msgs_num if ($msg_newer > $#msgs_num);
    $msg_day_last = $#msgs_num;
    $date1 = $date_tgt;
    for ($msg_num = $msg_tgt+1; $msg_num <= $#msgs_num; ++$msg_num) {
	$date = (localtime $msgs_cache{$msgs_num[$msg_num]}[0])[7];
	next if ($date == $date1);

	++$days;

	if ($date1 == $date_tgt) {
	    $msg_day_last = $msg_num-1;

	    # the "newer" link goes to the first file of the next day if
	    #	the current day fits on the web page
	    $msg_newer = $msg_num if (!$msg_newer || $msg_num < $msg_newer);
	}

	if ($msg_num > $msg_first + $page_size - $days) {
	    $msg_last = $msg_num-1 if (!$mode);
	    last;
	}

	$msg_last = $msg_num-1 if ($#msgs_num > $msg_first+$page_size-$days);
	$date1 = $date;
    }

    if ($mode) {
	++$msg_part_num if ($msg_part_num != 0
			    || $msg_first + $page_size <= $msg_day_last);
	# overlap the parts of a day by a line
	++$msg_last if ($msg_part_num != 0 && $msg_last < $msg_day_last);
    }

    # parse the log files to get the data
    for ($msg_num = $msg_first; $msg_num <= $msg_last; ++$msg_num) {
	$msg = $msgs_num[$msg_num];
	my(@error) = parse_log_msg($msg, "$logdir/msg.$msg", "no body");
	if (defined $error[0]) {
	    $msgs_date{$msg} = strftime("%x %X",
					localtime($msgs_cache{$msg}[0]))
		if (!$msgs_date{$msg} && $msgs_cache{$msg}[0]);
	    $msgs_from{$msg} = "<B class=warn>$error[0]</B>";
	    $msgs_result{$msg} = '';
	    $msgs_subject{$msg} = "<B class=warn>$error[1]</B>";
	} else {
	    $msgs_date{$msg} = $msg_date;
	    $msgs_from{$msg} = hdr_trim_encode($msg_from
					       ? $msg_from
					       : $msg_env_from);
	    $msgs_result{$msg} = $msg_result ? $msg_result : "&nbsp;";
	    $msgs_subject{$msg} = hdr_trim_encode($msg_subject);
	}
    }
    debug_time(($msg_last - $msg_first + 1) . " log files parsed");

    cache_flush() if ($need_flush);
}



##########################################################################
# whiteclnt file functions

# The file is represented as a list of references to 3-tuples.
#   The first of the three is the whitelist entry in a canonical form
#	as a key uniquely identifying the entry.
#   The second is a comment string of zero or more comment lines.
#   The third is the DCC whiteclnt entry.
#
#   The canonical form and the whiteclnt line of the first 3-tuple for a file
#   are null, because it is a preamble for the file.  It contains some of the
#   dates when the file has been changed as well as flags for the webuser
#   machinery.
#   The last triple in a file may also lack a whitelist entry.

# There is a hash or dictionary of references to entries in the list


# lock, read, and parse the file
sub read_whiteclnt {
    my($file_ref, $dict_ref) = @_;
    my($line, $prev_line, $comment);

    @$file_ref = ();
    %$dict_ref = ();

    # Creating the file here is usually a waste of effort, because
    # it must be writable by both the HTTP server and dccm or dccifd.
    # They are probably not in any common group.
    # Let the /var/dcc/libexec/newwebuser script create the per-user
    #	directories and files.
    # Because the contents of whitelists might be a little sensitive
    #	they should not be readable by "other"
    html_whine("open($whiteclnt): $!")
	if (!sysopen(WHITECLNT, $whiteclnt, O_RDWR | O_CREAT, 0660));
    chmod(0660, $whiteclnt);

    html_whine("flock($whiteclnt): $!")
	if (!flock(WHITECLNT, LOCK_EX | LOCK_NB));

    $comment = "";
    while ($line = <WHITECLNT>) {
	$line .= "\n" if (substr($line,-1) ne "\n");

	# collect the comment lines preceding the next entry
	$line =~ s/[ \t]+$//;
	if ($line =~ /(^[ \t]*#)|(^$)/) {
	    $comment .= $line;
	    next;
	}

	# use the previous count if the current value is missing,
	#	because that is what dcclib/parse_whitefile.c does.
	$line = "$1$line"
	    if ($line =~ /^[ \t]+/
		&& $#$file_ref > 0
		&& ($prev_line = ${${$file_ref}[$#$file_ref]}[2])
		&& $prev_line =~ /^(\S+)/);

	add_white_entry($file_ref, $dict_ref, $comment, $line);
	$comment = "";
    }

    # get trailing comment
    add_white_entry($file_ref, $dict_ref, $comment, "");
}



# read the main whiteclnt file to determine the default option settings
sub read_whitedefs {
    my($def_ref) = @_;
    my(@sb1, @sb2, $line, @parsed, $bydef);


    # these defaults for the defaults must match dcclib/parse_whitefile.c
    #	or elsewhere in the DCC client source (e.g. for discardok)
    %$def_ref = ();
    $bydef = " <SMALL>by default</SMALL>";
    ${$def_ref}{dccenable} = "<B>on</B>$bydef";
    ${$def_ref}{greyfilter} = "<B>on</B>$bydef";
    ${$def_ref}{greylog} = "<B>on</B>$bydef";
    ${$def_ref}{mtafirst} = "<B>last</B>$bydef";
    ${$def_ref}{reps} = "<B>off</B>$bydef";
    ${$def_ref}{dnsbl} = "<B>off</B>$bydef";
    ${$def_ref}{xfltr} = "<B>off</B>$bydef";
    ${$def_ref}{logall} = "<B>off</B>$bydef";
    ${$def_ref}{discardok} = "<B>delay mail</B>$bydef";

    foreach my $ck (split(/,/,$thold_cks)) {
	my $nm = "thold-$ck";
	if (!$conf_cks_tholds{$ck}) {
	    ${$def_ref}{$nm} = "<B>Never</B>$bydef";
	} else {
	    ${$def_ref}{$nm} = $conf_cks_tholds{$ck};
	}
    }

    if (!sysopen(MAINWHITE, $main_whiteclnt, O_RDONLY, 0)) {
	print STDERR "open(${main_whiteclnt}: $!\n";
	return;
    }

    if (!(@sb1 = stat(MAINWHITE))) {
	print STDERR "stat(${main_whiteclnt}: $!\n";
    } elsif (!(@sb2 = stat(WHITECLNT))) {
	print STDERR "stat(${$whiteclnt}: $!\n";
    } elsif ($sb1[0] == $sb2[0] && $sb1[1] == $sb2[1]) {
	# ignore it if we are somehow working on the main file
    } else {
	while ($line = <MAINWHITE>) {
	    # skip everything except option settings
	    next if ($line !~ /^[ \t]*option[ \t]+/i);

	    @parsed = parse_white_entry($line, 'option');
	    next if (! $parsed[1]);
	    ${$def_ref}{$parsed[0]} = "<B>$parsed[2]</B> <SMALL>by default in $main_whiteclnt</SMALL>";
	}
    }
    close(MAINWHITE);
}



# add an entry to our image of the file
#   sets the globals:
#	$whiteclnt_version	    #webuser version ...
#	$whiteclnt_notify	    #webuser mail-notify=X mailbox=Y
#	$whiteclnt_notify_pat	    regex for #webuser mail-notify=X mailbox=Y
#	$whiteclnt_lock		    #webuser (un)locked
#	$whiteclnt_change_log	    list of dates when file was changed
sub add_white_entry {
    my($file_ref, $dict_ref, $comment, $line) = @_;
    my(@parsed);

    # trim white space that is invisible on the web form
    $line =~ s/[ \t]+\n/\n/;
    $comment =~ s/[ \t]+\n/\n/g;

    # Notice and deal with the preamble.
    #	The preamble consists of the comments that start the file.
    if (! @$file_ref) {
	my($preamble, @buf, $got_parm);

	# remove the change-history, version, and parameters from the preamble
	$whiteclnt_version = "#webuser version 1.0\n";
	while ($comment =~ s/^#webuser version ([0-9.]+)[ \t]*\n//m) {
	    # for now, insist on version 1.0
	    html_whine("unrecognized version $1 in $whiteclnt")
	       if ($1 ne "1.0");
	    $got_parm = "yes";
	}

	$whiteclnt_notify_pat = '(#webuser mail-notify=)(on|off)( mailbox=)([-_a-z0-9]*)';
	$whiteclnt_notify = "#webuser mail-notify=off mailbox=\n";
	while ($comment =~ s/^$whiteclnt_notify_pat[ \t]*\n//im) {
	    $whiteclnt_notify = "$1$2$3$4\n";
	    $got_parm = "yes";
	}

	$whiteclnt_lock = "#webuser unlocked\n";
	while ($comment =~ s/^#\s*webuser\s+unlocked\s*$//im) {
	    $got_parm = "yes";
	}
	while ($comment =~ s/^#\s*webuser\s+locked\s*$//im) {
	    $whiteclnt_lock = "#webuser locked\n";
	    $got_parm = "yes";
	}

	$whiteclnt_change_log = "";
	while ($comment =~ s/^#\s*webuser created\s+(.+)\s*$//im) {
	    $whiteclnt_change_log = "#webuser created $1\n";
	    $got_parm = "yes";
	}
	undef(@buf);
	while ($comment =~ s/^#webuser\s+changed\s+(.+)\s*$//im) {
	    push(@buf, "#webuser changed $1\n");
	    $got_parm = "yes";
	}
	# keep only the last 20 dates of change
	if (@buf) {
	    my($start);
	    $start = $#buf-20;
	    $start = 0
		if ($start < 0);
	    $whiteclnt_change_log .= join('', @buf[$start .. $#buf]);
	}

	# Collect the non-parameter comments of the preamble, or everything
	#   through the last blank line before the first non-comment line.
	$preamble = "";
	$preamble .= $1 while ($comment =~ s/^(.*\n\n)//s);
	# everything is the preamble if there is no non-comment line
	#   or if the preamble seems to be empty
	if (($preamble eq "" && ! $got_parm)
	    || $line eq "" || $comment eq "\n") {
	    $preamble .= $comment;
	    $comment = "";
	}
	$preamble =~ s/\n+\n$/\n/;
	$preamble =~ s/^\n+//;
	push @$file_ref, [undef, $preamble, "\n"];

	# make a slot for option changes
	push @$file_ref, ["", undef, undef];

	return
	    if ($line eq "" &&  $comment eq "");
    }

    # If the line makes sense to use, remember where it will be.
    # Add it to the memory image of the file in either case.
    @parsed = parse_white_entry($line, '');
    if (! $parsed[1]) {
	# treat it like a comment if it makes no sense
	$comment .= $line;
	push @$file_ref, [undef, $comment, ""];
    } else {
	my($cur_key, $entry, $i, $k);

	$cur_key = $parsed[0];
	$entry = [$cur_key, $comment, $parsed[1]];
	push @$file_ref, $entry;

	if (${$dict_ref}{$cur_key}) {
	    $i = 0;
	    # mark duplicate values for eventual deletion
	    #	keep the last setting in the file
	    while (${$dict_ref}{$k = "DUP-$i-$cur_key"}) {
		++$i;
	    }
	    ${$dict_ref}{$k} = ${$dict_ref}{$cur_key};
	}
	${$dict_ref}{$cur_key} = $entry;
    }
}



# canonicalize a whitelist checksum "type value" string
sub parse_type_value {
    my($value) = @_;

    # Check for type, but don't worry much about substitute types.
    #	    Don't support received checksums.
    #	    Body checksums must be hex.
    $value =~ s/\s+$//;
    if ($value =~ s/^IP:?\s+//ix) {
	return ("IP", $value);
    } elsif ($value =~ s/^env[-_]from:?\s+//ix) {
	return ("env_From", $value);
    } elsif ($value =~ s/^env[-_]To:?\s+//ix) {
	return ("env_To", $value);
    } elsif ($value =~ s/^from:?\s+//ix) {
	return ("From", $value);
    } elsif ($value =~ s/^message[-_]id:?\s+//ix) {
	return ("Message-ID", $value);
    } elsif ($value =~ s/^substitute\s+([-a-z_0-9]+)+\s+//i) {
	return ("substitute $1", $value);
    } elsif ($value =~ s/^hex\s+body
			\s+([0-9a-f]{8})\s*([0-9a-f]{8})
			\s*([0-9a-f]{8})\s*([0-9a-f]{8})\s*$
			/$1 $2 $3 $4/ix) {
	return ("hex Body", $value);
    } elsif ($value =~ s/^hex\s+fuz([12]):?
			\s+([0-9a-f]{8})\s*([0-9a-f]{8})
			\s*([0-9a-f]{8})\s*([0-9a-f]{8})\s*$
			/$2 $3 $4 $5/ix) {
	return ("hex Fuz$1", $value);
    } else {
	return ("", $value);
    }
}



# canonicalize a threshold setting
sub parse_thold_value {
    my($pat, $type, $val);

    # check the name of the checksum by converting it into a pattern
    #	and matching it against the list of checksum types that can have
    #	per-user thresholds
    $pat = ",($_[0]),";
    $pat =~ s/[-_]/[-_]/g;
    $type = ',' . $thold_cks . ',';
    return 0 if ($type !~ /$pat/i);
    $type = $1;

    # check the threshold value
    if ($_[1] =~ /^Never$/i) {
	$val = 'Never';
    } elsif ($_[1] =~ /^many/i) {
	# reputation threshold is a % and reputation total is finite
	return 0 if ($type =~ /^rep/);
	$val = "many";
    } elsif ($_[1] =~ /^\d+$/) {
	$val = $_[1];
	if ($type =~ /^rep$/i) {
	    return 0 if ($val > 100);
	    $val .= '%';
	}
    } elsif ($_[1] =~ /^(\d+)%$/) {
	# reputation threshold is a %
	return 0 if ($1 > 100 || $type !~ /^rep$/i);
	$val = $_[1];
    } else {
	return 0;
    }


    $_[0] = $type;
    $_[1] = $val;
    return 1;
}



# See if a whiteclnt line makes sense
#   If so, return a list of (key, line) canonicalized.
#	If it is an option setting, return a third string that is the value
#	for the edit form.
#   If not, return an error message.
sub parse_white_entry {
    my($line,					# line to parse
       $mode					# ''=accept from file,
						# 'option'=new option setting
						# 'strict'=new whitelist entry
       ) = @_;

    my($count, $key, $type, $value);

    # recognize options
    if (!$mode || $mode eq 'option') {
	return ("dccenable", "option dcc-on\n", "on")
	    if ($line =~ /^\s*option\s+DCC-on\s*$/i);
	return ("dccenable", "option dcc-off\n", "off")
	    if ($line =~ /^\s*option\s+DCC-off\s*$/i);

	return ("greyfilter", "option greylist-on\n", "on")
	    if ($line =~ /^\s*option\s+greylist-on\s*$/i);
	return ("greyfilter", "option greylist-off\n", "off")
	    if ($line =~ /^\s*option\s+greylist-off\s*$/i);

	return ("greylog", "option greylist-log-on\n", "on")
	    if ($line =~ /^\s*option\s+greylist-log-on\s*$/i);
	return ("greylog", "option greylist-log-off\n", "off")
	    if ($line =~ /^\s*option\s+greylist-log-off\s*$/i);

	return ("mtafirst", "option MTA-first\n", "first")
	    if ($line =~ /^\s*option\s+MTA-first\s*$/i);
	return ("mtafirst", "option MTA-last\n", "last")
	    if ($line =~ /^\s*option\s+MTA-last\s*$/i);

	return ("reps", "option DCC-reps-on\n", "on")
	    if ($line =~ /^\s*option\s+DCC-reps-on\s*$/i);
	return ("reps", "option DCC-reps-off\n", "off")
	    if ($line =~ /^\s*option\s+DCC-reps-off\s*$/i);

	return ("dnsbl", "option dnsbl-on\n", "on")
	    if ($line =~ /^\s*option\s+dnsbl-on\s*$/i);
	return ("dnsbl", "option dnsbl-off\n", "off")
	    if ($line =~ /^\s*option\s+dnsbl-off\s*$/i);

	return ("xfltr", "option xfltr-on\n", "on")
	    if ($line =~ /^\s*option\s+xfltr-on\s*$/i);
	return ("xfltr", "option xfltr-off\n", "off")
	    if ($line =~ /^\s*option\s+xfltr-off\s*$/i);

	return ("logall", "option log-all\n", "on")
	    if ($line =~ /^\s*option\s+log-all\s*$/i);
	return ("logall", "option log-normal\n", "off")
	    if ($line =~ /^\s*option\s+log-normal\s*$/i);

	return ("discardok", "option forced-discard-ok\n", "discard spam")
	    if ($line =~ /^\s*option\s+forced-discard-ok\s*$/i);
	return ("discardok", "option forced-discard-nok\n", "delay mail")
	    if ($line =~ /^\s*option\s+forced-discard-nok\s*$/i);

	if ($line =~ /^\s*option\s+threshold\s+(\S+),(\S+)\s*$/i) {
	    $type = $1;
	    $value = $2;
	    return ("thold-$type", "option threshold $type,$value\n", "$value")
		if (parse_thold_value($type, $value));
	}

	# recognize old logging options
	return ("greylog", "option greylist-log-on\n", "on")
	    if ($line =~ /^\s*log\s+all-grey\s*$/i);
	return ("greylog", "option greylist-log-off\n", "off")
	    if ($line =~ /^\s*log\s+no-grey\s*$/i);

	# we are finished if only parsing a new option line we know is ok
	return if ($mode eq 'option');
    }

    return 'unrecognized option line'
	if ($line =~/^log/i || $line =~ /^option/i);

    return 'unrecognized DCC whitelist line'
	if ($line !~ /^(\S+)\s+(.*)/);
    $count = $1;
    $value = $2;

    return "unrecognized DCC whitelist count \"$count\""
	if ($count !~ /many|ok|ok2/i);

    ($type, $value) = parse_type_value($value);
    return "unrecognized DCC whitelist value \"$value\"" if (!$type);

    if ($mode eq 'strict') {
	# be picky becase we are checking for new whitelist entries
	#   from the form
	if ($value eq '') {
	    return "use a value of '<>' for missing or null Message-IDs"
		if ($type =~ /message_id/i);
	    return "missing $type value";
	}

	# check the syntax of IP addresses and CIDR blocks
	#   should also look for conflicts and excessive CIDR blocks
	if ($type =~ /IP/i) {
	    my($addr, $colons, $quads);
	    $addr = $value;
	    if ($addr =~ s/(.*)\/(\d+)/$1/) {
		$cidr = $2;
		$cidr += 96 if ($addr =~ /:/);
		return "$value is not a valid CIDR block"
		    if ($cidr > 128 || $cidr <= 0);
	    }
	    # it would be better to use some library to parse the IP address,
	    #	but in 2005, there is no Perl module that can handle IPv6
	    #	addresses and is almost always available
	    return "$addr is not a valid IP address"
		if ($addr !~ /^[.:0-9a-f]+$/);
	    if ($addr =~ /:/) {
		# IPv6
		my $colons = $addr;
		$colons =~ s/[^:]+//g;
		$colons = length($colons);
		if ($addr =~ /^::/ && $colons <= 7) {
		    ++$colons while ($colons < 7 && $addr =~ s/^::/::0:/);
		    $addr =~ s/^::/0:/ if ($colons == 7);
		} elsif ($addr =~ /::$/ && $colons <= 7) {
		    ++$colons while ($colons < 7 && $addr =~ s/::$/:0::/);
		    $addr =~ s/::$/:0/ if ($colons == 7);
		} else {
		    ++$colons while ($colons < 7 && $addr =~ s/::/::0:/);
		    $addr =~ s/::/:0:/ if ($colons == 7);
		}
		return "$value is not a valid IP address" if ($colons > 7);
		$addr =~ s/^([0-9a-f]{1,4}:)+//;
		$addr =~ s/^[0-9a-f]{1,4}$/127.0.0.1/;
		return "$value is not a valid IP address" if ($addr =~ /:/);

		# stop looking at IPv6 address with either the IPv4 trailing
		# part or a fake 127.0.0.1
	    }
	    $quads = 0;
	    while ($addr =~ s/^(\d{1,3})\.//) {
		return "$value is not a valid IP address" if ($1 > 255);
		++$quads;
	    }
	    return "$value is not a valid IP address"
		if ($addr > 255 || $quads > 3);
	}

	# we should now check for collisions among addresses
    }

    # dcc_str2ck() via dcc_parse_ck() ignores outside quotes and <>,
    # whitespace, and upper/lower case, and trailing periods.
    # So our key must also.  The value for the line in the file need not
    # be quite as clean
    $value =~ s/^\s+//;
    $value =~ s/\s+$//;
    $value =~ s/^<\s*(.+)\s*>$/$1/
	if ($value !~ s/^"\s*(.+)\s*"$/$1/);
    $value =~ s/\.+$//;

    # build the whiteclnt line
    $line = "$count\t$type";
    $line .= (length($type) < 8) ? "\t" : ' ';
    $line .= "$value\n";

    $value =~ s/\s//g;
    $value =~ tr/A-Z/a-z/;
    $key = "$type $value";

    return ($key, $line);
}



# check a proposed entry
#   return error message if it is bogus
#	or array of (key, comment, line) if it make sense
sub ck_new_white_entry {
    my($comment, $count, $type, $value) = @_;
    my(@parsed, @entry);

    foreach my $nm ('count', 'type', 'val', 'comment') {
	return "broken POST values without $nm\n"
	    if (!defined($query{$nm}));
    }

    # trim trailing whitespace from the comment lines
    $comment =~ s/[ \t\r]+\n/\n/g;
    # ensure comment lines start with '#'
    $comment =~ s/^([ \t]*[^# \t\n])/#$1/gm;
    # trim trailing blank lines from the comment
    $comment =~ s/\s+$//s;
    $comment .= "\n" if (length($comment) != 0);

    @parsed = parse_white_entry("$count $type $value", 'strict');
    return ($parsed[0])	if (!defined($parsed[1]));

    $entry[0] = $parsed[0];
    $entry[2] = $parsed[1];
    $entry[1] = $comment;
    return @entry;
}



# add, change, or delete a whitelist entry
sub chg_white_entry {
    my($file_ref, $dict_ref, $cur_key, $entry_ref) = @_;
    my($msg, $i, $k);

    return "$whiteclnt locked" if ($whiteclnt_lock =~ /\blocked/);

    if (!${$dict_ref}{$cur_key}) {
	if ($entry_ref) {
	    # add it to the list that will go to the disk if new
	    ${$dict_ref}{$cur_key} = @$entry_ref;
	    push @$file_ref, $entry_ref;
	}

    } else {
	# changing or deleting existing entry, so delete duplicates
	$i = 0;
	while (${$dict_ref}{$k = "DUP-$i-$cur_key"}) {
	    ${$dict_ref}{$k}[1] = undef;
	    ++$i;
	}

	if (!$entry_ref) {
	    # delete an entry
	    ${$dict_ref}{$cur_key}[1] = undef;

	} else {
	    # change an entry
	    @{${$dict_ref}{$cur_key}} = @$entry_ref;
	}
    }

    # put the changes on the disk
    $msg = write_whiteclnt(@$file_ref);
    return $msg if ($msg);

    # set the web form that includes the response
    read_whiteclnt($file_ref, $dict_ref);
    return undef;
}



# write a new version of the file
sub write_whiteclnt {		# return undef or error message
    my(@file) = @_;
    local(*DIR, *BAK);
    my($unlinked, @baks, $bak, $buf, $entry,  $preamble);

    # delete old backup files and find the name of the next one
    # keep only the last few and fairly recent revisions
    opendir(DIR, "$user_dir") or html_whine("opendir($user_dir): $!");
    @baks = map("$user_dir/$_",
		sort grep {/^(whiteclnt\.bak\d+$)/ && -f "$user_dir/$1"}
			readdir(DIR));
    closedir(DIR);
    while ($#baks > 1 && ($baks[0] =~ /(.*\/whiteclnt\.bak\d+$)/)
	   && ((-M $1) >= 1 || $#baks >= 19)) {
	unlink $1;			# suppress taint warning
	shift(@baks);
	$unlinked = 1;
    }
    if ($#baks >= 0) {
	$baks[$#baks] =~ /\/whiteclnt\.bak(\d+)$/;
	$bak = sprintf("%s/whiteclnt.bak%06d", $user_dir, $1+1);
    } else {
	$bak = "$whiteclnt.bak000000";
    }

    # create the undo file and copy the real file to it
    #	It might be smoother to rename the current file, but we might
    #	not have permission to create the new file with the correct owner.
    #	There are also dangers with symbolic links and rename().
    return "cannot create $bak: $!"
	if (!sysopen(BAK, $bak, O_WRONLY | O_CREAT | O_EXCL, 0660));
    return "seek($whiteclnt): $!"
	if (!seek(WHITECLNT, 0, 0));
    while (read(WHITECLNT, $buf, 8*1024)) {
	return "write($bak): $!"
	    if (!syswrite(BAK, $buf));
    }
    close(BAK);

    # rewrite the real file
    return "seek($whiteclnt): $!"
	if (!seek(WHITECLNT, 0, 0));
    return "truncate($whiteclnt): $!"
	if (!truncate(WHITECLNT, 0));

    $preamble = 0;
    foreach $entry (@file) {
	next if (!defined($$entry[1]));		# skip deleted entries

	print WHITECLNT $$entry[1];
	# output the parameters in the end of the preamble
	if (! $preamble) {
	    $preamble = 1;
	    print WHITECLNT $whiteclnt_version;
	    print WHITECLNT $whiteclnt_notify;
	    print WHITECLNT $whiteclnt_lock;
	    print WHITECLNT $whiteclnt_change_log;
	    print WHITECLNT strftime("#webuser changed %x %X%n", localtime);
	}
	print WHITECLNT $$entry[2];
    }
}



# undo the most recent operation by copying from the newest backup
sub undo_whiteclnt {
    my($bak, $buf);
    local(*BAK);

    return "$whiteclnt locked" if ($whiteclnt_lock =~ /\blocked/);

    $bak = newest_whiteclnt_bak();
    return "nothing undone"
	if (! $bak);

    return "open($bak): $!"
	if (!open(BAK, "< $bak"));

    return "seek($whiteclnt): $!"
	if (!seek(WHITECLNT, 0, 0));
    return "truncate($whiteclnt): $!"
	if (!truncate(WHITECLNT, 0));
    while (read(BAK, $buf, 8*1024)) {
	return "write($whiteclnt): $!"
	    if (!print(WHITECLNT $buf));
    }

    return "unlink($bak): $!"
	if (!unlink($bak));

    return undef;
}



# find the newest backup file
sub newest_whiteclnt_bak {
    local(*DIR);
    my(@baks, $bak);

    opendir(DIR, "$user_dir") || return undef;
    @baks = sort grep {/^whiteclnt\.bak\d+/ && -f "$user_dir/$_"}
		    readdir(DIR);
    closedir(DIR);

    return undef
	if ($#baks < 0);
    $bak = "$user_dir/$baks[$#baks]";
    return undef
	if (-M $bak >= 1);
    return undef				# suppress taint warning
	if ($bak !~ /(.*\/whiteclnt\.bak\d+$)/);
    return $1;
}
