#!/usr/bin/perl
#
# weblogin-passcheck -- Check Apache access logs for disclosed passwords.
#
# Takes as input Apache access logs, possibly preceded by the filename and a
# colon.  Parse each line to see if it contains a login via GET, and if so,
# store the username, user agent, and date.
#
# Written by Russ Allbery <rra@stanford.edu>
# Copyright 2009 Board of Trustees, Leland Stanford Jr. University
#
# See LICENSE for licensing terms.

##############################################################################
# Modules and declarations
##############################################################################

use strict;

use Date::Parse qw(str2time);
use Getopt::Long qw(GetOptions);
use POSIX qw(strftime);

# Set to the local Kerberos realm.  This is set by the --realm command-line
# option and, if set, means that the password will be checked to see if it's
# still valid.
our $REALM;

# The path to a file listing the valid users.  Log entries referring to users
# who are not listed in this file will be ignored.  This is set by the --users
# command-line option.  The file can either be a simple list of usernames, one
# per line, or be in the UNIX password file format.
our $USERS;

##############################################################################
# Utility functions
##############################################################################

# Decode form parameters in a URL.  Separates parameters on & or ; and then
# decodes URL-encoded attribute values.  This is a simplified decoding routine
# that is just enough to handle what we need for this scan.
sub decode_params {
    my ($params) = @_;
    my @params = split (/[;&]/, $params);
    my %params = map {
        my ($k, $v) = split (/=/, $_, 2);
        $v =~ s/%(..)/chr (hex ($1))/ge;
        ($k, $v);
    } @params;
    return \%params;
}

# Given a principal and password, attempt authentication and return true if
# authentication succeeded, false otherwise.  Requires Authen::Krb5 and
# assumes that it's already been initialized.
sub password_validate {
    my ($user, $password) = @_;
    $user .= '@' . $REALM unless $user =~ /\@/;
    my $client = Authen::Krb5::parse_name ($user)
        or die "Internal error parsing Kerberos principal";
    my $cache = Authen::Krb5::cc_resolve ('MEMORY:weblogin')
        or die "Internal error creating temporary ticket cache";
    $cache->initialize ($client)
        or die "Internal error initializing temporary ticket cache";
    my $server = Authen::Krb5::build_principal_ext ($client)
        or die "Internal error creating server principal";
    my $status = Authen::Krb5::get_in_tkt_with_password ($client, $server,
                                                         $password, $cache);

    # I have no idea why get_in_tkt_with_password returns 1.  I think it has
    # something to do with writing to the ticket cache, but I can't figure out
    # what's causing it.  However, if authentication doesn't succeed, it
    # returns something other than 1, so we can detect success that way.
    if (defined ($status) && ($status == 0 || $status == 1)) {
        return 1;
    } else {
        return;
    }
}

##############################################################################
# Main routine
##############################################################################

# Parse command-line options.
my $help;
Getopt::Long::config ('bundling');
GetOptions ('h|help'    => \$help,
            'r|realm=s' => \$REALM,
            'u|users=s' => \$USERS) or exit 1;
if ($help) {
    print "Feeding myself to perldoc, please wait....\n";
    exec ('perldoc', '-t', $0);
}

# Parse the password file to get a list of valid users.
my %full;
if ($USERS) {
    open (PASSWD, '<', $USERS) or die "Cannot open $USERS: $!\n";
    while (<PASSWD>) {
        my ($user) = split ':';
        $full{$user} = 1;
    }
    close PASSWD;
}

# Initialize Kerberos if a realm was specified.
if ($REALM) {
    require Authen::Krb5;
    Authen::Krb5::init_context ();
}

# Parse the Apache log and accumulate user information in %users.  We also
# accumulate user agent information in %agents, although that information is
# not currently used.
my (%users, %agents);
while (<>) {
    s,^[\w./-]+:,,;
    my ($host, $date, $query, $status, $refer, $agent)
        = /^(?:[\w._-]+:)?([\d.]+)\ \S+\ \S+\ \[([^\]]+)\]\ \"([^\"]+)\"
            \ (\d+)\ \S+(?:\ \"([^\"]+)\"\ \"([^\"]+)\")?\s*$/x;
    next unless $host;
    next unless $query =~ m,^GET\s+/login(?:-spnego)?/?(\S+),;
    my $params = decode_params ($1);
    next unless ($params->{username} and $params->{password});
    next unless $params->{login};
    my $user = $params->{username};
    next if (%full and not $full{$user});
    $date =~ s/:/ /;
    $date = str2time ($date);
    $agents{$agent}++ if $agent;
    $users{$user}{count}++;
    if (not $users{$user}{date} or $users{$user}{date} < $date) {
        $users{$user}{date} = $date;
        $users{$user}{agent} = $agent;
        $users{$user}{host} = $host;
        $users{$user}{password} = $params->{password};
    }
    $users{$user}{notest} = 1 unless $params->{test_cookie};
}

# We now have all the data.  Report on the users.
my (%counts, %vcounts, $total, $atrisk, $notest);
for my $user (sort keys %users) {
    my $date = strftime ('%Y-%m-%d %T', localtime $users{$user}{date});
    my $count = $users{$user}{count};
    my $valid;
    if ($REALM) {
        $valid = password_validate ($user, $users{$user}{password});
    }
    printf "%-8s  %4d time%s  Last: %s %s (from %s)\n", $user, $count,
        ($count == 1 ? ' ' : 's'), ($valid ? 'GP' : '  '), $date,
        $users{$user}{host};
    $total++;
    $atrisk++ if $valid;
    if ($count > 5) {
        $counts{'>5'}++;
    } else {
        $counts{$count}++;
    }
    if ($valid) {
        if ($count > 5) {
            $vcounts{'>5'}++;
        } else {
            $vcounts{$count}++;
        }
    }
    $notest++ if $users{$user}{notest};
}

# Report the total number of users.
printf "\n%4d total users\n", $total;
if ($REALM) {
    printf "%4d users with still-valid passwords\n", $atrisk;
}

# Give a breakdown of users by the number of incidents.
print "\nBreakdown of all users:\n";
for my $count (sort keys %counts) {
    printf "%2s time%s  %4d users (%4.1f%%)\n", $count,
        ($count == 1 ? ' ' : 's'), $counts{$count},
        ($counts{$count} / $total * 100);
}

# If we were checking passwords, also break down the users with still-valid
# passwords based on the number of incidents.
if ($REALM) {
    print "\nBreakdown of users with still-good passwords:\n";
    for my $count (sort keys %vcounts) {
        printf "%2s time%s  %4d users (%4.1f%%)\n", $count,
            ($count == 1 ? ' ' : 's'), $vcounts{$count},
            ($vcounts{$count} / $total * 100);
    }
}

##############################################################################
# Documentation
##############################################################################

=head1 NAME

weblogin-passcheck - Check Apache access logs for disclosed WebLogin passwords

=head1 SYNOPSIS

B<weblogin-passcheck> [B<-h>] [B<-r> I<realm>] [B<-u> I<users>] I<log>
[I<log> ...]

=head1 REQUIREMENTS

Perl 5.6 or later and the Date::Parse Perl module, which is part of the
TimeDate distribution on CPAN.  To check whether the user's password is
still valid, the Authen::Krb5 module, also available from CPAN, is
required.

=head1 DESCRIPTION

Versions of the WebLogin script included in WebAuth releases 3.5.5 through
3.6.1 could potentially convert the user login via POST to a GET, thus
exposing the user's password in the URL and possibly to other web servers
via referrer.  This script scans Apache access logs (in the combined,
host_combined, or common log formats) for WebAuth logins via GET and
produces a report of affected users.

Optionally, B<weblogin-passcheck> can also filter out invalid usernames
(usually due to brute-force intrusion attacks) given a list of users via
the B<-u> option.  Also optionally, B<weblogin-passcheck> can check
whether the password logged is still valid if given a default Kerberos
realm to construct Kerberos principals via the B<-r> option.  This option
requires the Authen::Krb5 Perl module.

=head1 OPTIONS

=over 4

=item B<-h>, B<--help>

Print out this documentation (which is done simply by feeding the script
to C<perldoc -t>).

=item B<-r> I<realm>, B<--realm>=I<realm>

Check any detected passwords to see if they are still valid.  Entries for
users with a password that's still valid will have C<GP> in the report.
The I<realm> is used to create the Kerberos principal from the username if
the username does not contain an C<@>.

=item B<-u> I<users>, B<--users>=I<users>

If this option is given, the I<users> argument is the path to a file
containing a list of valid users and any user not listed in that file will
be filtered out of the report.  The file may either be one username per
line or in the UNIX password file format.

This option assumes that usernames in the WebLogin logs will be
unqualified.  If the WebLogin server is used to authenticate users from
multiple realms, the full principals as occur in the username form
parameter on the WebLogin server must be listed in this file.

=back

=head1 CAVEATS

This script assumes the recommended URL for the WebLogin login script and
requires that the logs be in one of the standard Apache log formats.

=head1 SEE ALSO

This script is is part of the WebAuth distribution, the current version of
which can be found at L<http://webauth.stanford.edu/>.

=head1 AUTHOR

Russ Allbery <rra@stanford.edu>

=cut
