#!/usr/bin/perl -w

# MimeFilter --- Strip unwanted MIME attachments from a message
#
# Copyright (C) 2000-2018 Davide Giovanni Maria Salvetti
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# On Debian GNU/Linux System you can find a copy of the GNU General Public
# License in "/usr/share/common-licenses/GPL".

##########################################################################
# This is a self-documenting script: try and run it with -h or --help to #
# read its manpage.  Of course, you may also use man if it is available. #
##########################################################################

# FLAG: debug mode
BEGIN { $::DEBUG = 0, $SIG{'__WARN__'} = sub { warn $_[0] if $::DEBUG } }

# Read in config files: system wide first, then directory.
my $read = 0;
foreach my $config ("/etc/mimefilter.rc", "mimefilter.rc") {
    if (-r $config) {
	unless ($return = do $config) {
	    warn "couldn't parse $file: $@" if $@;
	    warn "couldn't do $file: $!"    unless defined $return;
	    warn "couldn't run $file"       unless $return;
	} else { $read++; }
    }
}
die "sorry, no valid configuration file found." unless $read;

# we want to END before dying
use sigtrap qw(die normal-signals);
use strict;
# east to west, you do or die
use Fatal qw(open close mkdir);
use Mail::Address;
use MIME::Parser;
# we're good netizens
use Text::Wrap qw(fill $columns); $columns = 72;

while ($_ = shift) {
    next unless /^-h|--help$/;
    use Pod::Text;
    open(\*POD, '| pod2text');
    select(POD);
    print while <::DATA>;
    close(\*POD);
    exit 0;
}

# how many accepted parts?
my $ok = 0;

# holds the MIME types of stripped parts
my @cassate = ();

# the following stack tell us if we are enclosed in a multipart/ or message/
# entity (@altstack <> ()), and what type is it: if it is message/
# $altstack[$#altstack] is undefined, if it is multipart/alternative
# $altstack[$#altstack] is defined and false, otherwise it is non alternative
# multipart/ and $altstack[$#altstack] is defined and true

# stack tracing what entity we're enclosed in
my @altstack = ();

# recursive function: filters an entity
sub filtra {
    my $entity = shift;

    # some times a multipart entity has just one part
    $entity->make_singlepart;

    # message entity are similar to multipart ones
    if ($entity->is_multipart or $entity->mime_type =~ '^message/') {
	if ($entity->mime_type =~ '^message/') {
	    # tell apart message from multipart
	    push @altstack, undef;
	} else {
	    push @altstack,
	    # tell apart message/alternative from other multiparts
	    ($entity->mime_type =~ '^multipart/alternative')?0:1;
	}

	# this brand new entity will be filled with ok parts
	my $cleaned = new MIME::Entity;
	# give it the old one header
	$cleaned->head($entity->head);

	foreach ($entity->parts) {
	    # this is a recursive filter
	    my $part = &filtra($_);
	    # add ok parts or their replacements;
	    $cleaned->add_part($part) if $part;
	}

	# we may have ended with just one part
	$cleaned->make_singlepart;

	# WARNING: WE HAVE TO POP ANYWAY!
	unless (defined(pop @altstack) || $cleaned->parts) {
	    # if we are here, we are in a (now) empty message
	    # dropped message MIME type
	    push @cassate, $cleaned->mime_type;

	    my @messaggio = @::messaggio;
	    # uso $cassate[$#cassate] e non $entity->mime_type
	    s/_TIPOMIME_/$cassate[$#cassate]/ foreach @messaggio;
	    @messaggio = fill("", "", @messaggio);

	    return build MIME::Entity(
				      Type           => 'text/plain',
				      Encoding       => '-SUGGEST',
				      Data           => \@messaggio,
				      Description    => $::descrizione,
				      Top            => 0,
				      );
	}

	return $cleaned;
    }

    if (grep { $entity->mime_type =~ m,^$_$, } @::ammessi
	and not
	grep { $entity->mime_type =~ m,^$_$, } @::nonammessi) {
	if (grep !/^$/, $entity->bodyhandle->as_lines) {
	    # it should have at least a non empty line
	    $ok++;
	    return $entity;
	} else {
	    # otherwise we just ignore it
	    return undef;
	}
    }

    # dropped entity MIME type
    push @cassate, $entity->mime_type;

    if ($altstack[$#altstack]) {
	# neither in a multipart/alternative, nor in a message/
	my @sostituto = @::sostituto;
	# uso $cassate[$#cassate] e non $entity->mime_type
	s/_TIPOMIME_/$cassate[$#cassate]/ foreach @sostituto;
	@sostituto = fill("", "", @sostituto);
	push @sostituto, "\n";

	return build MIME::Entity(
				  Type           => 'text/plain',
				  Encoding       => '-SUGGEST',
				  Data           => \@sostituto,
				  Description    => $::descrizione,
				  Top            => 0,
				  );
    }
    return undef;
    # in a multipart/alternative or in a (top level) message, either nested or
    # not, we simply drop it
}

# just a convenient abbreviation
sub messaggio {
    my @messaggio = shift;
    my $ord = 'A';

    push @messaggio, @::tagliato if @cassate;
    push (@messaggio, ("    " . $ord++ . ". $_\n")) foreach @cassate;

    return @messaggio;
}

# rename a given Field to Old-Field
sub rinomina {
    my $entity = shift;
    my $field = shift;

    return undef unless $entity->head->get($field);

    $entity->head->combine($field);
    $entity->head->add("Old-$field", $entity->head->get($field));
    $entity->head->delete($field);
    return 1;
}


# The code here has mostly been stolen from Mail::Internet, as the good ol'
# reply method doesn't seem to work anymore with woody MIME::Tools.
# Oh, well...
sub replica {
    my $original = shift;
    my $reply = new MIME::Entity;

    # Take care of the Subject.
    my $subject = $original->head->get('Subject') || "";
    $subject = "Re: " . $subject if($subject =~ /\S+/ && $subject !~ /Re:/i);
    $reply->head->replace('Subject',$subject);

    # Locate who are we sending to.
    my $to = $original->head->get('Reply-To')
	|| $original->head->get('From')
	    || $original->head->get('Return-Path')
		|| "";
    # Mail::Address->parse returns a list of refs to a 2 element array.
    my $sender = (Mail::Address->parse($to))[0];
    my $name = $sender->name;
    my $id = $sender->address;
    # Ensure we do have a name.
    unless(defined $name) {
	my $fr = $original->head->get('From');
	$fr = (Mail::Address->parse($fr))[0] if(defined $fr);
	$name = $fr->name if(defined $fr);
    }
    $reply->replace('To', $id);

    # Take care of the references.
    my $refs = $original->head->get('References') || "";
    my $mid = $original->head->get('Message-Id');
    $refs .= " " . $mid if(defined $mid);
    $reply->replace('References',$refs);

    # Take care of the In-Reply-To field.
    my $date = $original->head->get('Date');
    my $inreply = "";
    if(defined $mid) {
	$inreply  = $mid;
	$inreply .= " from " . $name if(defined $name);
	$inreply .= " on " . $date if(defined $date);
    } elsif(defined $name) {
	$inreply = $name . "'s message";
	$inreply .= "of " . $date if(defined $date);
    }
    $reply->replace('In-Reply-To', $inreply);

    # We're done.
    return $reply;
}

# send a given message entity
sub invia {
    my $entity = shift;
    my $to = shift;
    my $xdiagnostic = shift;

    if ($to) {
	# we're cc'ing, so to speak
	&rinomina($entity, 'Date');
	&rinomina($entity, 'Return-Receipt-To');
	&rinomina($entity, 'Read-Receipt-To');
	&rinomina($entity, 'Acknowledge-To');
    }

    $entity->head->add('X-Diagnostic', $xdiagnostic) if $xdiagnostic;

    ### Qui si potrebbe anche usare | $SENDMAIL $sendmailOPT $sendmailOPTp,
    ### prendendo tutto dall'ambiente, se si gira sotto Smartlist.
    my $sendmail = '| /usr/lib/sendmail -f' .
	$ENV{'listreq'} . ' -i ' . ($to?$to:'-t');

    $::DEBUG || open(\*MAIL, $sendmail);
    $entity->print($::DEBUG?\*STDOUT:\*MAIL);
    $::DEBUG || close(\*MAIL);
    return 1
}

# Existing unwritable files with the same name as the one the parser would like
# to use would cause the parsing to fail.  To guard against this, we use a
# private directory and clean it before use (i.e., /tmp is not a good choice to
# store attachments); we create it under /var/list/<list> (where archived
# message already are stored).

# the directory attachment will go into
my $outdir = "tmp.mimefilter-$$";

# Since we work in a private space (under /var/list/<list>/), we assume it's
# safe to wipe out $outdir: it may be had left here by a previous mimefilter
# died before unlinking it, or it may be here because something else, we just
# don't (and shouldn't) care.

# $outdir had better not to exist
system "rm -rf $outdir" if -e $outdir;
# create our _private_ directory
mkdir ($outdir, 0700) and my $rmdir = 1;
# don't forget to clean it when we're done!
END { rmdir $outdir or warn "rmdir $outdir: $!" if defined($rmdir); }

my $parser = new MIME::Parser(output_dir => $outdir);
# recursive parsing on 'message/' types
$parser->extract_nested_messages('NEST');

# parse the message
my $original = $parser->read(\*STDIN);

unless ($original) {
    # parsing failed
    # top level MIME header
    my $header = $parser->last_head;
    my $reply = &replica($header);
    $reply->head->replace('From', $::from);
    $reply->head->add('X-Loop', $ENV{'listaddr'});
    $reply->bodyhandle(new MIME::Body::InCore(\@::invalido));
    $::DEBUG?$reply->print(\*STDERR):&invia($reply);

    # Smartlist will recover the original message for us
    die "MIME parsing failed";
}

my $processed = &filtra($original);

if ($processed && $ok) {
    # we have at least a good part to send
    $processed->print;
    if (@cassate) {
	my $reply = &replica($original);
	$reply->head->replace('From', $::from);
	$reply->head->add('X-Loop', $ENV{'listaddr'});
	$reply->bodyhandle(new MIME::Body::InCore([&messaggio(@::mutilato)]));
	$::DEBUG?$reply->print(\*STDERR):&invia($reply);
	&invia($original, $ENV{'maintainer'}, "Message cleaned (@cassate)")
	    if $ENV{'filter_mime_cc_maintainer'} =~ /y/i and $ENV{'maintainer'};
    }
} else {
    # we ended with an empty message
    my $reply = &replica($original);
    $reply->head->replace('From', $::from);
    $reply->head->add('X-Loop', $ENV{'listaddr'});
    $reply->bodyhandle(new MIME::Body::InCore([&messaggio(@::vuoto)]));
    $::DEBUG?$reply->print(\*STDERR):&invia($reply);
    &invia($original, $ENV{'maintainer'}, "Empty message (@cassate)")
	if $ENV{'filter_mime_cc_maintainer'} =~ /y/i and $ENV{'maintainer'};
}

# don't forget to clean up $outdir
$original->purge;

# make Procmail happy
exit 0;

__END__

=head1 NAME

mimefilter - filter a MIME message stripping unwanted MIME parts

=head1 SYNOPSIS

mimefilter [OPTIONS]

=head1 DESCRIPTION

The B<mimefilter> script accept on STDIN a MIME conforming message, and outputs
on STDOUT a MIME conforming message.

It strips every unwanted MIME part, warning by email the original author about
this, and outputs a MIME compliant cleaned message, to be further processed by
a mailing list software.

You may find it useful if you don't want certain attachments on your mailing
lists, or if you want to allow just the text part from multipart/alternative
messages, and so on.  You can easily fine tune the list of allowed MIME types
to suit your particular needs, using normal Perl regexps.

=head1 OPTIONS

The B<mimefilter> script may take just an option, in either its short or long
form:

=over 4

=item I<-h>, I<--help>

Causes the script to print this very manpage and then succesfully exit.

=back

However, the B<mimefilter> script won't bark at you if it discovers you
supplied some other options as well, it'll just politely ignore them.

=head1 ARGUMENTS

The B<mimefilter> script cheerfully takes an unlimited number of command line
arguments and happily discards them all.

=head1 FILES

The B<mimefilter> script will look for a system wide configuration file in
F</etc/mimefilter.rc>, and for a local, per working directory, configuration
file in F<./mimefilter.rc>.  The latter may be used to override any or all of
the parameters defined by the former, thus allowing easily per mailing list
customization.

Several configuration parameters are provided, the most important being the
list of admissible MIME types (where Perl regexps may be used), along with the
list of never to be allowed ones (so that you may even specify, e.g., 'text/.*'
in the admissible types list and 'text/html' in the never to be allowed one, to
allow every text part but html ones).

See the default configuration file for examples of use and further
documentation.

=head1 ENVIRONMENT

The B<mimefilter> script will look for the following environment variables:

=over 4

=item I<list>

The name of the mailing list this message is intended for.  Used as the return
address of the warning issued to the orginal author if the message is not
already clean.

The Smartlist mailing list software will automatically pass this variable to
B<mimefilter>.

=item I<listaddr>

The address of the mailing list this message is inteded for.  Used in the
B<X-Loop> field of the warning issued to the original author if the message is
not already clean.

The Smartlist mailing list software will automatically pass this variable to
B<mimefilter>.

=item I<listreq>

The administrative (owner) address of the mailing list this message is inteded
for.  Used in the return address of the warning issued to the original author if
the message is not already clean.

The Smartlist mailing list software will automatically pass this variable to
B<mimefilter>.

=item I<maintainer>

The email address of the maintainer of the mailing list this message is inteded
for.  If it is defined, it is used to send the maintainer original carbon copies
of messages that have been modified by this filter -- if
I<filter_mime_cc_maintainer> is affermative, of course.

The Smartlist mailing list software will automatically pass this variable to
B<mimefilter>.

=item I<filter_mime_cc_maintainer>

A boolean flag: if affermative (i.e., if it matches the /y/i Perl regular
expression), the B<mimefilter> script will send carbon copies of every cleaned
(modified) message to the maintainer of the mailing list the message is intended
for.

Users of the Smartlist mailing list software may conveniently set this variable
in rc.custom.

=item I<filter_mime>

The B<mimefilter> script itself will pay no attention to this variable, but if
you have followed what the author suggests in
L<the RECOMMENDED USE WITH THE SMARTLIST MAILING LIST SOFTWARE section|"RECOMMENDED USE WITH THE SMARTLIST MAILING LIST SOFTWARE">,
you will need to define it affermative in rc.custom to activate this script:

	filter_mime = yes

=back

=head1 RETURN VALUE

The B<mimefilter> script returns 0 on success and a positive integer on errors.

=head1 RECOMMENDED USE WITH THE SMARTLIST MAILING LIST SOFTWARE

Put the following lines in rc.local.s[012]0 (the right one just depends on your
specific needs, look at rc.submit for more info):

    :0
    * filter_mime ?? y
    {
	    # Pass the mail trough mimefilter
	    :0 fw
	    | mimefilter

	    # Executed if mimefilter died
	    :0 e
	    {
		    :0 hfw
		    | formail -A "X-Diagnostic: MIME filtering failed"

		    HOST=continue_with_rc.request
	    }

	    # Trash empty messages (author's already been warned by mimefilter)
	    :0 Bh
	    * < 1
	    /dev/null
    }

Also remember to uncomment the appropriate line in rc.custom, to activate
rc.local.s[012]0, and don't forget to customize the list of admissibile and
never to be allowed MIME types in the configuration file(s).

=head1 USING THIS SCRIPT WITH OTHER MAILING LIST SOFTWARE

The author believes no particular arrangements are necessary to use this script
with mailing list software other than Smartlist, altough one should remember
that B<mimefilter> expects to find at least the B<list>, B<listaddr>, and
B<listreq> environment variables set.

=head1 SEE ALSO

L<The Smartlist mailing list software documentation|smartlist>, L<the
mimefilter.rc(5) man page (yet to be written)|mimefilter.rc>.

=head1 BUGS

Naaa... ;-)

=head1 UNRESTRICTIONS

This program is copylefted.  Refer to the GNU General Public License for
conditions of use.

=head1 AUTHOR

This program has been written and is actively maintained by S<Davide Giovanni
Maria> Salvetti, E<lt>salve@linux.itE<gt>.

=head1 HISTORY

This script was originally aimed for use with a bunch of Smartlist served
maling lists the author administers.  He believes it can be successfully used
with other mailing list softwares as well.

=cut
