#!/usr/bin/python

# todo host aliases missing

import logging
import os
import sys
import email
import copy
import string
from optparse import OptionParser
from email.Generator import Generator
import gdbm
import mailbox

wanted_header_fields = set(['Content-Type','MIME-Version','Content-Transfer-Encoding','Subject','From','X-Mailer'])
unwanted_header_fields = set(['X-deja-vu-digest','X-deja-vu','X-deja-vu-line'])
desc="""%prog is a filter for cron generated mail.

it expects mails which only differ in small amounts.
Matching is done, by matching line by line against already learned mails.

If a mail is matched, the header
X-deja-vu is set to yes.
Otherwise the header is set to, no and if specified the header X-deja-vu-line will 
contain lines of the mail which did not match.

"""

def clean_mail_header(msg):
    for key in unwanted_header_fields:
        if (key in msg.keys()):
            logging.debug("remove header field from mail: " + key)
            del msg[key]
    return msg

def clean_mail(msg):
    msg = copy.deepcopy(msg)
    return clean_mail_header(msg)

def unify_string(s):
    return string.translate(s, string.maketrans('0123456789','x'*10))

def flatten_mail_header(msg):
    h = ""
    for key in wanted_header_fields:
        if (key in msg.keys()):
            h = h + msg[key]
    return h

def flatten_mail_body(msg,recurse=0):
    # TODO error handling
    # failsafe
    if recurse>10:
        return ""
    if msg.is_multipart():
        b = ""
        for mb in msg.get_payload():
            b = b + flatten_mail_body(mb, recurse+1)
        return b
    else:
        return msg.get_payload()

def flatten_mail(msg):
#    msg = copy.deepcopy(msg)
    return unify_string(flatten_mail_body(msg) + flatten_mail_header(msg))

parser = OptionParser(usage="%prog <options>", version="%prog 0.1",description=desc)
parser.add_option("-m", "--mail", dest="mail_filename",metavar="<filename of mail>",
                  help="test against mail from filename")
parser.add_option("-a", "--add", dest="add",metavar="<Maildir>",
                  help="build database from maildir")
parser.add_option("-s", "--show", dest="show_header_lines",metavar="<number of lines>",type="int",
                  help="show first <num> of not matched lines in header")
parser.add_option("-d", "--debug", dest="debug",action="store_true",
                  help="show debug output")
(options, args) = parser.parse_args()

db_filename = os.getenv('HOME')+'/.deja-vu.dbm'

if options.debug:
    logging.basicConfig(level=logging.DEBUG)
else:
    logging.basicConfig(level=logging.WARNING)
    

if (options.add):
    db = gdbm.open(db_filename,'n')
    for message in mailbox.Maildir(options.add,factory=None):
        logging.debug("process mail msgid: " + message['Message-Id'])
        for line in flatten_mail(clean_mail(message)).splitlines():
            db[line] = '1'
    db.close
else:
    if options.mail_filename:
        f = open(options.mail_filename, "r")
        message = email.message_from_file(f)
        f.close()
    else:
        message = email.message_from_file(sys.stdin)
    message_out = clean_mail_header(message)
    db = gdbm.open(db_filename,'r')

    match = True
    for line in flatten_mail(clean_mail(message)).splitlines():
        if not line in db:
            logging.debug("not match:" + line)
            if options.show_header_lines:
                message_out['X-deja-vu-line'] = line
                options.show_header_lines = options.show_header_lines - 1
            match = False
        else:
            logging.debug("    match:" + line)

    if match:
        message_out['X-deja-vu'] = 'yes'
    else:
        message_out['X-deja-vu'] = 'no'

    logging.debug("------------ OUTPUT MAIL START ------------------")
    print message_out.as_string(False),
    logging.debug("------------ OUTPUT MAIL END ------------------")
    db.close

# vim:set et:
# vim:set ts=4:
# vim:set shiftwidth=4:
