#!/usr/bin/perl -w
use strict;

# OS X hack - tell CGI where to write uploaded temp files.  Otherwise,
# the uploaded file seems to get lost...
BEGIN {$ENV{TMPDIR} = '/var/tmp';}

use FindBin;
use lib "$FindBin::Bin/lib";

use HTML::TagCloud;
use HTML::Entities;

use Getopt::Long;

# Parse the notes file, locating tags at the end of entries and
# building up two data structures.
#
# Both of these structures collect "notes," references in %lines and
# the actual scalar in @all_notes, which contains a note ready for
# display in our HTML output.  First, these notes have had HTML
# elements encoded to simplify processing and make it harder to do
# nasty things to the user's browser.  Then the tags at the end of the
# lines have been turned into links, same as are used in the tag
# cloud, to enhance navigation.
#
# %lines
#  foo => [
#		   "note ref (tagged with foo)",
#		   "another note ref (tagged with foo)",
#			...
#		  ]
#
# @all_notes - arrary of the set of all notes refered to in %lines -
# in other words, every note found.	 Used in searching.

our $filename; # Name of the parsed notes file
our $cloud;
our %lines;
our @all_notes;


# Parse notes file or files.  Argument list is either a single file
# handle from a CGI upload, in which case it's a ref, or else a list
# of filenames provided via the command line --files option, in which
# case I need to open them and parse them.

sub parse_notes_file {
  # Reset on new file
  %lines = ();
  @all_notes = ();

  # URL used in constructing tag-links
  my $url = '?tag=';

  my $fh;
  foreach my $element (@_) {
	if (ref $element) {
	  $fh = $element;
	} else {
	  open ($fh, $element) or die "Can't open $element: $!\n";
	}

	local $/ = "\n\n";		  # Double-newline separates input records
	while (<$fh>) {
	  # Need a copy of the "note" to work on and refer to, and we need
	  # it with HTML chars like <, >, etc, escaped to "&lt;", "&gt;",
	  # etc.
	  my $this_line = HTML::Entities::encode($_);

	  # Pop words off the end of the note, processing them as tags as
	  # long as they start with "@".	Keep a list of these tags so that
	  # we can wrap them in href's when we're done picking them out.
	  my @tags;					# tags found at the end of this note
	  foreach my $tag ( reverse split ) {
		last unless $tag =~ s/^\@//; # Not a tag, bail
		push @tags, $tag;
		push (@{$lines{$tag}}, \$this_line);
	  }

	  foreach my $tag (@tags) {
		# Greedy match in $1 insures that $2 will be the last instance
		# of $tag in the note - in other words, the one on the end with
		# the "@" prefix.	 And we know that each $tag was parsed off the
		# end of this note, insuring this works.
		$this_line =~ s|(.*)\b($tag)\b|$1<a href="$url$2">$2</a>|s;
	  }

	  push @all_notes, $this_line;
	}
  }

  # Build tag cloud 
  $cloud = HTML::TagCloud->new(levels => 24);
  foreach my $tag (keys %lines) {
	$cloud->add($tag, $url.$tag, scalar @{$lines{$tag}});
  }
}

# Dirt-simple web server - displays the tag cloud, and the set of all
# notes that match a given tag, if provided.  Also accepts requests to
# search the notes, showing highlighted results.
{
  package MyWebServer;

  use HTTP::Server::Simple::CGI;
  use base qw(HTTP::Server::Simple::CGI);
 
  sub handle_request {
	my $self = shift;
	my $cgi	 = shift;
	return if !ref $cgi;

	# If you were given a file of notes to parse, then do so
	my $fh = $cgi->upload('upload');
	&main::parse_notes_file($fh) if $fh;

	# Print out the headers, html, the tag cloud, and the search form.
	$main::filename = $cgi->param('upload') if $cgi->param('upload');;
	my $title = 'Tag Cloud' . ($main::filename ? " for $main::filename" : '');
	print "HTTP/1.0 200 OK\r\n", $cgi->header, $cgi->start_html($title);

	# Only print the tag cloud, search box, and results if you've parsed a file
	if (scalar @all_notes) {

	  print $main::cloud->html_and_css();

	  print $cgi->start_form(),
		"<p align=\"right\"> Search all notes for: ",
		  $cgi->textfield('search_string'),
			$cgi->submit(-value => 'Search'), $cgi->end_form(),
			  "<br><i>(search is case-insensitive)</i><p>";

	  print "<hr>";

	  # Now do something interesting with your params, if any.

	  my $tag = $cgi->param('tag');
	  my $search_string = $cgi->param('search_string');

	  if ($search_string) {		# Display search results
		my $output;

		# Perform same HTML encoding on the search string that we did on
		# the notes, so that searching for things like "<" will work.
		$search_string = HTML::Entities::encode($search_string);

		print $cgi->h1("Notes that match \"$search_string\"");

		# A little ugly: We're going to grep thru @all_notes looking for
		# a match - but we need to strip the HTML markup (which we've
		# added to turn tags into links) out of the notes before checking
		# for a match, so that you don't match inside the HTML markup
		# while searching.  Also, you need to use a temp var, because
		# otherwise grep will modify $_.	Finally, use \Q (quotemeta) -
		# we don't want full patterns here, too much risk
		foreach (grep {my $t; ($t=$_) =~ s/<.*?>//g;
					   $t =~ /\Q$search_string/i}
				 @main::all_notes) {

		  # We want to highlight the match in yellow, but not change the
		  # saved copy of the note - so we work on a copy, $output.
		  #
		  # Regex to (roughly) match an HTML tag:	 <.*?>
		  #
		  # This s/// matches either an entire tag, or our search
		  # string.  The replacement bit is executed (/e): if $2 (our
		  # search string) has matched, wrap it in yellow.
		  # Otherwise, $1 (a tag) is what matched, and it gets
		  # replaced with itself.
		  #
		  # The /e is used instead of just saying "$1$2" (with $2 wrapped
		  # in yellow) because that produces endless warnings about use
		  # of undefined values - because only one of the two alternates
		  # is ever defined in the replacement bit.

		  ($output = $_) =~ s{(<.*?>)|($search_string)}
							 {
							   $2 ? "<b><FONT style=\"BACKGROUND-COLOR: yellow\">$2</FONT></b>" : $1}eig;
		  print $output, "<p>";
		}

	  } elsif ($tag) {			# Display notes that match "$tag"
		print $cgi->h1("Notes tagged with \"$tag\"");
		foreach my $ref (@{$main::lines{$tag}}) {
		  print $$ref, "<p>";
		}
	  }
	}

	# Always print out your upload field at the end of the page
	print $cgi->hr(), $cgi->start_multipart_form(),
	  "<p align=\"center\"> Create tag cloud from file: ",
		$cgi->filefield(-name => 'upload', -size => 60),
	  $cgi->br(), $cgi->br(), $cgi->submit(-label => 'Load File'),
		"</p>",
		$cgi->end_form();

	print $cgi->end_html;
  }
} # End of web server class

# Parse your command line options
sub Usage {
  "$0 [--port N] [--files filename [filename...]]\n";
}

my $port;
my @files;
die Usage() unless GetOptions("files=s{,}" => \@files, "port=s" => \$port);
$port ||= 8080;

# If you got file names on your command line, start with them.
if (scalar @files) {
  parse_notes_file(@files);
  $filename = join ", ", @files;
}

# Start an instance of MyWebServer on port $port, only bind to localhost
my $pid = MyWebServer->new($port);
$pid->host('localhost');

# Spawn a web browser, pointing to us.  Wait a sec for the server to get running.
if (not fork()) {
  sleep 2;

  # Try a couple of different ways to open a broswer pointed at us

  if ($^O =~ /darwin/) { # OS X
	system "open http://localhost:$port";
  } elsif ($^O =~ /linux/) {
	my $res = system "gnome-open http://localhost:$port";
	# $res is -1 if gnome-open not found, 256 on error, 0 on success.
	# In any case, guess you're running under kde and try again.
	if ($res) { 
	  system "kde-open http://localhost:$port";
	}
  }
  exit;
}

eval {
  $pid->run();
};

if ($@) { 
  die "Something else is already listening to port $port, pick another port to use\n" if $@ =~ /Address already in use/;

  die $@;
}

#            Copyright (c) 2008, Dan McDonald. All Rights Reserved.
#        This program is free software. It may be used, redistributed
#        and/or modified under the terms of the Perl Artistic License
#             (see http://www.perl.com/perl/misc/Artistic.html)

# Version: $Revision: 1.4 $
#
# Changelog:
#
# $Log: tagcloud.pl,v $
# Revision 1.4  2009/10/29 03:05:53  dan
# Switched to Getopt::Long.  Added --files option, changed
# parse_notes_file() to take either a filehandle or a list of files to
# parse.
#
# Revision 1.3  2009/10/21 15:23:23  dan
# Made port configurable, added command line switch processing, trap
# bind error and die with meaningful message.
#
# Revision 1.2  2009/10/21 01:58:28  dan
# Added "open" code for gnome and kde desktop environments, in addition
# to OS X.
#
# Revision 1.1  2009/10/21 01:37:21  dan
# Initial revision
#
