#!/usr/bin/perl -w use strict; # download: perl script to control the connection to the remote # site to download and otherwise manage remote email. # Usage: download [-h] [-m] [-l] [-D N] [-p] # -h Give help (usage) message # -l Local/Connect # If -l isn't set, we just do things locally. # -m Move/Shuffle mail # We get the date/time of now, so that we can delete mail # remotely. If we have possibly downloaded remote mail, # we shuffle mail around with this set. # -p Don't check for pppd running, line is already up. # -D N Set Debug level to N (not used yet) # We intend to run this program under control of cron, probably # every hour or so. However, if pppd is active when this program # starts, we will exit and do nothing. Either I am downloading # stuff and using up all the bandwidth, or I am online and can # manage the remote mail spool manually. We can tell whether # pppd is active, by looking for the presence of the /var/run/ppp0.pid # file. # Calls /usr/bin/pon ~/bin/rmmail, /usr/bin/poff, ~/bin/mergebox, # and /bin/mv, in that order. # ~/bin/rmmail - Will process files placed in ~/rmmail/FTP_WORK (if # present, and called with -F switch), but basically # downloads remote mail to subdirs of ~/rmmail. Downloaded # FTP files will eventually be processed as would remote # POP or IMAP email, and is split into multiple folders # under ~/rmmail/folders # ~/bin/mergebox - The directories this program accesses are completely # controlled by command line arguments. Here, it will # read files from ~/rmmail/folders, ~/rmmail/FTP_WORK (which # should almost always be empty when mergebox is run from # here) and writes to ~/Mail, and reads from ~/Mail and # writes to ~/Archives/Mail. # ==================================================================== # I receive most (all) email at a remote site. It ends up in a single # mail folder, but could be multiple (IMAP supports this, not POP). # I am going to download all of this for local storage, but leave stuff # remote for some period of time (some remote email places will delete # email as soon as it thinks you have a copy, mine doesn't do this). # As I download it, I will analyse and split the email into multiple # folders, which is more convenient for local browsing. Especially if # one is using a threaded mail reader. I may delete local copies of # messages manually, as spam filters and what not can't be perfect. # So err on the side of not deleting a message as spam. After some # length of time, it isn't likely I need to access a message, but it # might be nice to get at it as a generic INBOX as opposed to split # into topics. So, keep email in my split input files for about 3 weeks, # upon which time it is gathered into a INBOX for another 3 weeks, # before it is archived. My download program puts the mail into folders # under ~/rmmail, and my local mail clients look in ~/Mail. # FreeNet/Mail/INBOX ---- (rmmail) ----> ~/rmmail/folders/$TOPIC # ~/rmmail/folders/$TOPIC ---- (mergebox) ----> ~/Mail/$TOPIC (-3 weeks) # ~/Mail/$TOPIC ---- (mergebox) ----> ~/Mail/INBOX (+3 -6 weeks) # ~/Mail/$TOPIC ---- (mergebox) ----> ~/Archives/Mail/$TOPIC # ~/Mail/INBOX ---- (mergebox) ----> ~/Archives/Mail/INBOX # On the sent-mail side of things # FreeNet/Mail/sent-mail ---- (rmmail) ----> ~/rmmail/FTP_WORK/sent* # FreeNet/Mail/sent-mail-*---- (rmmail) ----> ~/rmmail/FTP_WORK/sent-* # ~/rmmail/FTP_WORK/sent* ---- (mergebox) ----> ~/Mail/sent-mail # ~/rmmail/FTP_WORK/sent* ---- (/bin/cp) ----> ~/Mail/sent-mail-$DATE # ~/Mail/sent-mail-$DATE ---- (/bin/cp) ----> ~/Archives/Mail/sent* # ==================================================================== use Getopt::Std; use Digest::MD5 qw(md5_base64); # To calculate a Message-ID for init. use Mail::Internet; use Date::Manip; use vars qw($opt_h $opt_m $opt_l $opt_D $opt_p); my( $date_now, $date_m3w, $date_m6w, $connect, $shuffle ); getopts('hmlD:p'); &usage() if( $opt_h ); $connect = $opt_l ? 0 : 1; $shuffle = $opt_m ? 0 : 1; if( $shuffle ) { # Get timestamp, remove hours/minutes/seconds to make a date. # Calculate minus-3-weeks and minus-6-weeks dates. Format # as valid Date::Manip timestamps. $date_now = $opt_D ? $opt_D : &ParseDate("today"); # YYYYMMDDhh:mm:ss if( $date_now =~ /^(\d\d\d\d\d\d\d\d)(\d\d:\d\d:\d\d)$/ ) { $date_m3w = $1 . '00:00:00'; } else { die "Can't understand ParseDate(today)=($date_now)\n"; } $date_m3w = &DateCalc($date_m3w, "- 3 weeks"); $date_m6w = &DateCalc($date_m3w, "- 3 weeks"); } # This runs by cron. Set $HOME just in case. $ENV{HOME} = '/home/ghaverla'; # We run some executables. Set up complete pathnames to them. my $RMMAIL = "$ENV{HOME}/bin/rmmail"; my $MERGEBOX = "$ENV{HOME}/bin/mergebox"; # We have a few directories we are interested in. my $RMM_DIR = "$ENV{HOME}/rmmail/folders"; my $FTP_DIR = "$ENV{HOME}/rmmail/FTP_WORK"; my $MAILDIR = "$ENV{HOME}/Mail"; my $AR_DIR = "$ENV{HOME}/Archives/Mail"; # ================================================================ # We can now start up our PPP connection to remote site, and process # the email there. All of the above could take a while, so check on # pppd again. :-) &download_remote() if( $connect ); # ================================================================ # Now we can shuffle mail around locally. # ================================================================ &shuffle_local() if( $shuffle ); # All done! exit( 0 ); # ============================================================ # Routine stolen from mergebox. # This overwrites any pre-existing mbox with name $folder. The date # is the beginning of the UNIX epoch, all email is received after # that date, so this should always be first in a sorted mbox. sub write_init_mbox { my $dest_mbox = shift; my $hdr = new Mail::Header(MailFrom => "KEEP"); my @hdr_array = [ 'From nobody@nowhere.org', 'From: nobody@nowhere.org', 'To: nobody@nowhere.org', 'Date: Thur, 1 Jan 1970 00:00:01 -0700 (MST)', 'Subject: Dummy Message for Mbox', ]; my @bdy_array = [ "Don't delete this message, I need a stub message.\n", ]; $hdr->extract( @hdr_array ); my $email = new Mail::Internet( Header => $hdr, Body => @bdy_array ); my $email_as_string = $email->as_mbox_string; my $ID = md5_base64( $email_as_string ); # Note, this is a constant. $hdr->add( 'Message-ID', $ID ); $email = new Mail::Internet( Header => $hdr, Body => @bdy_array ); $email_as_string = $email->as_mbox_string; open( MBOX, "> $dest_mbox" ) || die "Can't open $dest_mbox to write initial message to\n"; print MBOX $email_as_string; close( MBOX ); } sub usage { die "No usage yet. Sorry.\n"; } sub download_remote { if( ! $opt_p ) { if( -e '/var/run/ppp0.pid' ) { # User said ppp was up, but it isn't # exit silently. exit 0; } `/usr/bin/pon`; # We need to sleep for a "while". 90 seconds? 30 not enough. sleep 90; } # Currently under ~/bin is rmmail3.pl. Takes options of # h, c, b, f, t, l, P, I, F. The last 3 (PIF) control whether it is # going to connect to the remote site as POP, IMAP or FTP. The 'h' # option gets you the usage message. The 't' option is for testing, # and opens a "rmmail.session.$month.$day" file. The 'c' option # forces configuration. Forwarding mail determined to be spam can # be kind of dangerous, so an option must be set to allow it. The # same goes for bouncing spam. The -l switch gets us a list of # remote folders under IMAP or FTP. POP only deals with INBOX. # Rmmail keeps a message cache of downloaded messages, so we # won't be downloading the same message over and over. It also # runs each message through a filter to split files into # different subfolders. # We want to use POP to handle the INBOX (less overhead) and FTP # to download monthly sent-mail-* files. At the present time, # I'm going to ignore the current sent-mail file. We don't want # bounce or forward, or list/help/configure/test. # Rmmail has commented out the delete on line 657, to delete remote # POP messages which have already been downloaded. The delete at # line 736 to delete a remote message determined to be spam is in # effect, as is the delete at line 751 to delete "old" remote mail. # FTP does not delete files it downloads. The code to delete files # after downloading, is at line 1302. If FTP downloads a sent-mail # file, it just dumps the output into a sent-mail folder. # FTP gets file list by effectively `ls $f`. I want $f=mail/sent-mail-* `$RMMAIL -P`; # Deal with INBOX via POP # `$RMMAIL -F`; # Deal with sent-mail-* via FTP # Currently processes all mail/*, not mail/sent-mail-* # And shutdown our connection. `/usr/bin/poff` if( ! $opt_p ); } sub shuffle_local { # First off, we have a number of inbox folders under ~/rmmail/folders, # and possibly INBOX and sent-mail* under ~/rmmail/FTP_WORK. We # want to gather up all the inboxen and merge them into # ~/Archives/Mail/INBOX and move any sent-mail-* to ~/Archives/Mail # If we move any folders from .../folders/, we leave behind a mbox # with a single null message in it. # First, handle incoming mail # Grab the TOPIC $folder, and merge it into TOPIC folder in ~/Mail. # Delete stuff in $RMM_DIR/$folder that is older than 3 weeks. { opendir( FOLDERS, "$RMM_DIR" ) || die "Can't open dir $RMM_DIR: $!\n"; my @folders = readdir( FOLDERS ); closedir( FOLDERS ); foreach my $folder (@folders) { next if( ¬_mail_folder( $folder ) ); next if( $folder eq 'INBOX' ); # Shouldn't be here, ever. if( $folder =~ /^sent-mail-/i ) { `$MERGEBOX $MAILDIR/$folder $RMM_DIR/$folder`; } else { `$MERGEBOX -d $date_m3w $MAILDIR/$folder $RMM_DIR/$folder`; } # write_init_mbox($folder); # This overwrites mbox. } } # Grab the TOPIC $folder, and merge it into INBOX folder in ~/Mail. # Delete stuff in $MAILDIR/$folder (TOPIC) that is older than 3 weeks. # We want to copy (merge) the TOPIC stuff into Archive TOPIC as well. { opendir( FOLDERS, "$MAILDIR" ) || die "Can't open dir $MAILDIR: $!\n"; my @folders = readdir( FOLDERS ); closedir( FOLDERS ); foreach my $folder (@folders) { next if( ¬_mail_folder( $folder ) ); next if( $folder eq 'INBOX' ); `$MERGEBOX -d $date_m3w $MAILDIR/INBOX $MAILDIR/$folder` unless( ($folder =~ /^sent-mail/i) || ($folder =~ /^outbox$/i) || ($folder =~ /^drafts$/i) || ($folder =~ /^trash$/i) ); `$MERGEBOX $AR_DIR/$folder $MAILDIR/$folder`; } } # Merge our Mail copy of INBOX into archives version. Delete all # INBOX stuff in Mail that is older than 6 weeks while doing this. if( -e "$MAILDIR/INBOX" ) { `$MERGEBOX -d $date_m6w $AR_DIR/INBOX $MAILDIR/INBOX`; } # Now handle outgoing mail (sent-mail) { opendir( FOLDERS, "$FTP_DIR" ) || die "Can't open dir $FTP_DIR: $!\n"; my @folders = readdir( FOLDERS ); closedir( FOLDERS ); # Technically, rmmail should process all the FTP stuff into folders. # And hence, nothing here should get touched. But, we'll see what # happens. foreach my $folder (@folders) { next if( ¬_mail_folder( $folder ) ); if( $folder eq 'INBOX' ) { # We probably never trip this. `$MERGEBOX $RMM_DIR/INBOX $FTP_DIR/$folder`; # write_init_mbox($folder); } elsif( $folder =~ /^sent-mail-/i ) { if( -e "$AR_DIR/$folder" ) { print STDERR "$AR_DIR/$folder already exists, skipping move from $FTP_DIR\n"; } else { `/bin/mv $FTP_DIR/$folder $AR_DIR/$folder`; } } } } } sub not_mail_folder { my $fname = shift; # The '.' and '..' patterns covered by .* # return( 1 ) if( $fname eq '.' ); # return( 1 ) if( $fname eq '..' ); return( 1 ) if( $fname =~ /^\./ ); # Above, and kmail index files return( 1 ) if( $fname =~ /~$/ ); # Emacs backup return( 1 ) if( $fname =~ /\.bak$/ ); # Vi (and other) backup return( 1 ) if( $fname =~ /^\#/ ); # Emacs autosave return( 0 ); }