#!/usr/bin/perl -w

use strict;
use MT;
use MT::Author;
use MT::Entry;
use MT::Comment;
use MT::Trackback;
use MT::TBPing;

package MT;

my %param;
sub param {
    my $app = shift;
    my $p = shift;
    @_ ? $param{$p} = shift : $param{$p};
}

package main;

use Getopt::Long;
use Pod::Usage;
my $comments = 1;
my $trackbacks = 1;
GetOptions("comments!" => \$comments,
           "trackbacks!" => \$trackbacks,
           "help|?" => \my($help),
           "blog_id=i" => \my($blog_id),
           "offset=i" => \my($offset),
           "limit=i" => \my($limit),
           "config=s" => \my($cfg)) or pod2usage(2);
pod2usage(1) if $help;
($comments || $trackbacks) or pod2usage(2);

my $app = new MT(($cfg ? ('Config' => $cfg) : ()));
$app->param('blog_id', $blog_id) if $blog_id;
$app->param('limit', $limit) if $limit;
$app->param('offset', $offset) if $offset;
if ($comments) {
    $app->param('_type', 'comment');
    find_junk($app);
}
if ($trackbacks) {
    $app->param('_type', 'tb');
    find_junk($app);
}

sub _cb_notjunktest_filter {
    my ($eh, $obj) = @_;
    require MT::JunkFilter;
    MT::JunkFilter->filter($obj);
    $obj->is_junk == 1 ? 0 : 1;
}

sub find_junk {
    my $app = MT->instance;
    my $blog_id = $app->param('blog_id');
    my $offset = $app->param('offset') || 0;
    my $limit = $app->param('limit');
    my $iter;
    my $type = $app->param('_type');
    if ($type eq 'comment') {
        $iter = MT::Comment->load_iter({
            ($blog_id ? (blog_id => $blog_id) : ()),
            junk_status => 0 },
            {'sort' => 'created_on',
             'direction' => 'descend',
             offset => $offset });
    } elsif ($type eq 'tb') {
        $iter = MT::TBPing->load_iter({
            ($blog_id ? (blog_id => $blog_id) : ()),
            junk_status => 0 },
            {'sort' => 'created_on',
             'direction' => 'descend',
             offset => $offset });
    } else {
        return;
    }
    my $i;
    MT->_register_core_callbacks({NotJunkTest =>
                                  \&_cb_notjunktest_filter});

    print "Scanning " . ($type eq 'comment' ? "Comments" : "TrackBacks") . "...\n";
    my $count = 0;
    my $junk = 0;
    while (my $obj = $iter->()) {
        last if $limit && $count == $limit;
        $count++;
        my $subject = $obj->clone;
        if (!MT->run_callbacks('NotJunkTest', $subject)) {
            if ((my $err = MT->errstr) =~ m/\w/) {
                print STDERR "** error from callback: " . $err . "\n" if $err;
                MT->error(undef);
            }
            display_junk($subject);
            $junk++;
        }
    }

    print "\nScanned: $count records\n";
    print "  Found: $junk junk items\n\n";
}

sub display_junk {
    my ($obj) = @_;

    my $entry;
    my $cat;
    my $type;
    if (ref $obj eq 'MT::Comment') {
        $entry = MT::Entry->load($obj->entry_id);
        $type = 'comment';
    } else {
        $type = 'trackback';
        my $tb = MT::Trackback->load($obj->tb_id);
        if ($tb->entry_id) {
            $entry = MT::Entry->load($tb->entry_id);
        } elsif ($tb->category_id) {
            $cat = MT::Category->load($tb->category_id);
        }
    }

    print "Junk $type found -- ID " . $obj->id . "\n";
    print "\tTarget: ";
    if ($entry) {
        print "Entry \"". $entry->title . "\" (" . $entry->id . ")\n";
    } elsif ($cat) {
        print "Category \"" . $cat->label . "\" (" . $cat->id . ")\n";
    }
    print "\t   Log:\n";
    my $log = $obj->junk_log;
    $log = "\t\t" . ($log || '');
    $log =~ s/\n/\n\t\t/gs;
    $log =~ s/\n+$//s;
    print $log . "\n";
    print "\t Score: " . $obj->junk_score . "\n";
    print "\tContent:\n";
    if ($type eq 'comment') {
        print "\t\t   Name: " . $obj->author . "\n";
        print "\t\t    URL: " . $obj->url . "\n";
        print "\t\t E-mail: " . $obj->email . "\n";
        print "\t\t     IP: " . $obj->ip . "\n";
        print "\t\t  Authn: " . ($obj->commenter_id ? 'Yes: ' . (MT::Author->load($obj->commenter_id)->name) : 'No') . "\n";
        my $text = $obj->text;
        $text = "\t\t\t" . $text;
        $text =~ s/\n/\n\t\t\t/sg;
        $text =~ s/\n+$//s;
        print "\t\t   Text:\n$text\n";
    } elsif ($type eq 'trackback') {
        print "\t\t   Blog: " . $obj->blog_name . "\n";
        print "\t\t    URL: " . $obj->source_url . "\n";
        print "\t\t E-mail: " . $obj->email . "\n";
        print "\t\t     IP: " . $obj->ip . "\n";
        print "\t\t  Title: " . $obj->title . "\n";
        my $text = $obj->excerpt;
        $text = "\t\t\t" . $text;
        $text =~ s/\n/\n\t\t\t/sg;
        $text =~ s/\n+$//s;
        print "\t\tExcerpt:\n$text\n";
    }
    print "\n";
}

__END__

=head1 NAME

find-junk

=head1 SYNOPSIS

    find-junk
        --nocomments       Prevents filtering comments
        --notrackbacks     Prevents filtering trackback pings
        --blog_id <id>     Limit scan to a particular blog
        --offset <n>       Specify number of records to skip
        --limit <n>        Specify number of records to process
        --config <cfg>     Specify MT configuration path and file
        --help | -?        Usage information

By default, all comments and trackbacks are scanned (unless the comment
or trackback record has been intentionally identified by the user as
"not junk" or "is junk").
