#! /usr/local/bin/perl
# Copyright  by Denys Duchier, Mar 1998, Universitt des Saarlandes
#
# analyze the frequency of instruction sequences
# instructions maybe normalized arbitrarily to allow the recognition
# of more general sequences. to plug in your own normalizer, you
# should write a perl file that contains a function (say foo) to be
# called for normalizing an instruction. the instruction will be
# available as $_ and foo should side-effect $_. the file must end
# with `1;' so that `require' may succeed. the file should also install
# foo as the normalizing function using `$normalize = \&foo;'.

local $normalize;
my ($option,$tmp,$cutoff,$width,$do_tables);

# default values

$tmp    = $ENV{'TMPDIR'} || "/tmp"; # tmp directory
$cutoff = 50;
$width  = 2;
$normalize = \&normalize_much;
$do_tables = 0;

select(STDERR); $|=1;
select(STDOUT); $|=1;

while ($ARGV[0] =~ /^-/) {
    $option = shift;
    if    ($option eq '--') { break; }
    elsif ($option eq '--help') { &help; }
    elsif ($option =~ /^--tmpdir=/) { $tmp = glob($'); }
    elsif ($option =~ /^--cutoff=/) {
	$cutoff = int($');
	die "illegal cutoff value: $cutoff" unless $cutoff > 0;
    }
    elsif ($option =~ /^--width=/ ) {
	$width = int($');
	die "illegal width value: $width" unless $width > 0;
    }
    elsif ($option =~ /^--load=/) { require $'; }
    elsif ($option eq '--tables') { $do_tables = 1; }
    else { die "unknown option: $option"; }
}

&do_tables if $do_tables;

sub help {
    print STDOUT <<EOF;
usage $0 [options] files...
    --help		print this information
    --tmpdir=DIR	use directory DIR for temporary files
    --cutoff=N		consider sequences that occur >= N times (default N=50)
    --width=N		consider sequences of N instructions (default N=2)
    --load=FILE		load FILE
    --tables		just analyze frequency of tables in callBI and testBI
EOF
    exit(0);
}

sub do_tables {
    my (%i,%o,%io);
    my ($i,$o,$io);
    my $n = 0;
    while (<>) {
	next if /\b[xy]\(\d\d\d/o;
	next unless /\b(call|test)BI\(\S+ (.*)\#(.*) /;
	$n++; $i=$2; $o=$3; $io="$2#$3";
	$i{$i} = 1 + int($i{$i});
	$o{$o} = 1 + int($o{$o});
	$io{$io} = 1 + int($io{$io});
    }
    print "TOTAL NUMBER OF TABLES: $n\n";
    &pp_table(\%i,"IN");
    &pp_table(\%o,"OUT");
    &pp_table(\%io,"IN#OUT");
    exit(0);
}

sub pp_table {
    my $table = shift;
    my $title = shift;
    print "\n$title TABLES\n";
    my @l = ();
    my ($k,$v);
    while (($k,$v) = each %$table) {
	next if $v < $cutoff;
	push @l,{k=>$k,v=>$v};
    }
    @l = sort { $b->{v} <=> $a->{v} } @l;
    my $x;
    foreach $x (@l) { print $x->{v},"\t",$x->{k},"\n"; }
}

my $n = $width;			# how many consecutive instructions
my $N = $n-2;
my $M = $n-1;
my $normalizer = $normalize;

# 1. normalize the input

sub normalize_much {
    s/\b(definition|endDefinition|putRecord|setConstant|callBI|testBI)\(.*\)/\1\(\)/;
    s/\bputConstant\(\S+\s/putConstant\(/;
    s/\b(switchOnTerm\(\S+).*\)/\1\)/;
    s/\bx\(\d+\)/x\(\)/g;
    s/\by\(\d+\)/y\(\)/g;
    s/\bg\(\d+\)/g\(\)/g;
}

print STDERR "Normalizing instructions...";
die "ouch: $!" unless open(O,">$tmp/$$.1");
my $label;
while (<>) {
    next if /^\%/o;
    next if /^\d+ \% words$/o;
    $label = '';
    chomp;
    if (/^lbl\(\d+\)\s+/) { $label = 'lbl:'; $_ = $'; }
    else { s/^\s+//; }
    &$normalizer;
    print O $label,$_,"\n";
}
close(O);
print STDERR "done\n";

# 2. produce all windows of width n

print STDERR "Computing windows...";
open(I,"<$tmp/$$.1"); unlink("$tmp/$$.1");
open(O,">$tmp/$$.2");
my @l = ();
my $i;
my $K = 0;
my $window;

while($i = <I>) {
    chomp($i);
    @l = ($i,@l[0..$N]);
    $K++;
    next unless $K>$M;
    $window = join(' ',@l);
    next if $window =~ /.\blbl:/; # label in middle
    $window = $' if $window =~ /^lbl:/;	# remove leading label
    print O $window,"\n";
}
close(I);
close(O);
print STDERR "done\n";

# 3. split into smaller files to facilitate sorting

print STDERR "Splitting...";
die "oops" if system("split $tmp/$$.2 $tmp/$$.3.");
print STDERR "done\n";
unlink("$tmp/$$.2");

# 4. sort each one of the smaller files

print STDERR "Sorting";
my @files = glob("$tmp/$$.3.*");
my ($fi,$fo);
foreach $fi (@files) {
    print STDERR ".";
    die "weird" unless $fi =~ /\.3\./;
    $fo = "$`.4.$'";
    die "oops: $fi" if system("sort -T $tmp $fi > $fo");
    unlink($fi);
}
@files = ();
print STDERR "done\n";

# 5. merge the smaller files

print STDERR "Merging...";
@files = glob("$tmp/$$.4.*");
my $files = join(' ',@files);
die "oops" if system("sort -T $tmp -m $files > $tmp/$$.5");
unlink @files;
@files = ();
print STDERR "done\n";

# 6. sort the windows and produce a count

print STDERR "Counting...";
open(I,"<$tmp/$$.5"); unlink("$tmp/$$.5");
open(O,">$tmp/$$.6");
my $prev = <I>;
my $curr;
my $count = 1;

while ($curr = <I>) {
    if ($curr eq $prev) {
	$count++;
	next;
    }
    goto loop if $count < $cutoff;
    print O "$count\t$curr";
  loop:
    $prev = $curr;
    $count = 1;
}

if ($count >= $cutoff) {
    print O "$count\t$curr";
}
close(I);
close(O);
print STDERR "done\n";

# 7. sort and print results

print STDERR "Sorting results...";
die "oops" if system("sort -nr $tmp/$$.6 > $tmp/$$.7");
unlink("$tmp/$$.6");
print STDERR "done\n\n";

die "oops" if system("cat $tmp/$$.7");
unlink("$tmp/$$.7");
