#!/usr/bin/perl -w

##############################################################################
#
# Print billing management system - stats analysis tool, version 4.2.0
#
# Copyright (C) 2002, 2003 Daniel Franklin
#
# This program is distributed under the terms of the GNU General Public
# License Version 2.
#
# This script should be called from a cron-job running as lpd user. Various
# PNG files are written out, plus textual stats information is dumped to
# stdout.
#
##############################################################################

use strict;
use Printbill::printbill_pcfg;
use Getopt::Long;
use Locale::gettext;
use POSIX;
use Printbill::PTDB_File;

setlocale (LC_MESSAGES, "");
textdomain ("printbill");

# This script should be called from a cron-job running as lpd user. Two
# PNG files are written out.

my $config = '/etc/printbill/printbillrc';
my %params = pcfg ($config);
my ($nslices, %opt, $printer, @stats, $sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst, $now, @weekly, @monthly, @annually, @overall, $i);
my (@ptime, @ctime, @filesize, @pages, @cyan, @magenta, @yellow, @black, @dist, $path, $png_url, @time_per_page, %printerhash, $colourspace, $fraction);
my (@cyan_coverage_per_page, @magenta_coverage_per_page, @yellow_coverage_per_page, @black_coverage_per_page, $web);

if ($] >= 5.005) {
       Getopt::Long::Configure ("pass_through");
       Getopt::Long::Configure ("bundling");
} else {
       Getopt::Long::config ("pass_through");
       Getopt::Long::config ("bundling");
}

$printer = "lp";
$fraction = 1;
$nslices = 20;
$path = ".";

GetOptions (\%opt, "fraction=f" => \$fraction, "printer=s" => \$printer, "help!", "slices=i" => \$nslices, "path=s" => \$path, "png_url=s" => \$png_url, "web!");

if ($opt{help}) {
	printf gettext ("\n\n%s - generate stats and graphs for specified printer. Options:

	--printer <printer>
		generate stats for printer <printer>

	--path <path>
		writes PNG files out to path <path>
			
	--fraction <fraction>
		only consider the <fraction> smallest values (e.g. .95)

	--slices <slices>
		use <slices> bins in frequency histograms

	--web --png_url <url>

		generates html instead of plain text, also <url> should be
		the same location as that specified for --path, but
		expressed relative to the document root of the web server
		(e.g., if --path is /var/www/printbill, --png_url should be
		/printbill)

	--help
		tells you this

"), $0;
	exit;
}

# Only produce CMY graphs for colour printers

tie %printerhash, "Printbill::PTDB_File", "$params{'db_home'}/printers/$printer.db", "TRUE"
	or die "$0: cannot open file $params{'db_home'}/printers/$printer.db: $!\n";

$colourspace = $printerhash{"colourspace"};

untie %printerhash;

# Read stats file

$now = time;

open STATS, "$params{'stats_path'}/printbill_stats_$printer.dat"
	or die sprintf (gettext ("%s: Error: cannot open %s for reading: %s.\n"), $0, "$params{'stats_path'}/printbill_stats_$printer.dat", $!);
	
for ($i = 0; $i < 24; $i++) {
	$weekly[$i] = 0;
	$monthly[$i] = 0;
	$annually[$i] = 0;
	$overall[$i] = 0;
}	

$i = 0;

while (<STATS>) {
	chomp;
	@stats = split ("\t");
	
	die sprintf (gettext ("%s: Error: must have exactly 10 entries in each row of the stats file %s.
This means you are using an obsolete data file and you should remove it.\n"), $0) if ($#stats != 10);

	($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime ($stats[0]);

# These stats occurred within the last week

	if ($now - $stats[0] <= 604800) {
		$weekly[$hour] += $stats[6];
	}
	
# These stats occurred within the last 30 days

	if ($now - $stats[0] <= 18144000) {
		$monthly[$hour] += $stats[6];
	}
	
# These stats occurred within the last year (365.25 days)

	if ($now - $stats[0] <= 220903200) {
		$annually[$hour] += $stats[6];
	}
	
	$overall[$hour] += $stats[6];

	$ptime[$i] = $stats[1] + $stats[2];
	$ctime[$i] = $stats[3] + $stats[4];
	$filesize[$i] = $stats [5];
	$pages[$i] = $stats[6];
	$cyan[$i] = $stats[7];
	$magenta[$i] = $stats[8];
	$yellow[$i] = $stats[9];
	$black[$i] = $stats[10];
	
	$i++;
}

close STATS;


# Plot various load-over-time distributions

&plot ("$path/$printer" . "_weekly.png", gettext ("Hour"), gettext ("Pages Printed"), "[0:24]", @weekly);
&plot ("$path/$printer" . "_monthly.png", gettext ("Hour"), gettext ("Pages Printed"), "[0:24]", @monthly);
&plot ("$path/$printer" . "_annually.png", gettext ("Hour"), gettext ("Pages Printed"), "[0:24]", @annually);
&plot ("$path/$printer" . "_overall.png", gettext ("Hour"), gettext ("Pages Printed"), "[0:24]", @overall);

# Plot various statistical distributions

@dist = &hist ($nslices, $fraction, @ptime);
&plot ("$path/$printer" . "_ptime.png", gettext ("Parent Processing Time"), gettext ("Frequency"), "[:]", @dist);

@dist = &hist ($nslices, $fraction, @ctime);
&plot ("$path/$printer" . "_ctime.png", gettext ("Child Processing Time"), gettext ("Frequency"), "[:]", @dist);

@dist = &hist ($nslices, $fraction, @filesize);
&plot ("$path/$printer" . "_filesize.png", gettext ("File Size"), gettext ("Frequency"), "[:]", @dist);

@dist = &hist ($nslices, $fraction, @pages);
&plot ("$path/$printer" . "_pages.png", gettext ("Pages"), gettext ("Frequency"), "[:]", @dist);

if ($colourspace ne "mono" && $colourspace ne "pagecount") {
	@dist = &hist ($nslices, $fraction, @cyan);
	&plot ("$path/$printer" . "_cyan.png", gettext ("Total Cyan % Coverage"), gettext ("Frequency"), "[:]", @dist);

	@dist = &hist ($nslices, $fraction, @magenta);
	&plot ("$path/$printer" . "_magenta.png", gettext ("Total Magenta % Coverage"), gettext ("Frequency"), "[:]", @dist);

	@dist = &hist ($nslices, $fraction, @yellow);
	&plot ("$path/$printer" . "_yellow.png", gettext ("Total Yellow % Coverage"), gettext ("Frequency"), "[:]", @dist);

	for ($i = 0; $i < $#pages; $i++) {
		if ($pages[$i]) {
			$cyan_coverage_per_page[$i] = $cyan[$i] / $pages[$i];
		} else {
			$cyan_coverage_per_page[$i] = 0;
		}
	}

	@dist = &hist ($nslices, $fraction, @cyan_coverage_per_page);
	&plot ("$path/$printer" . "_cyan_per_page.png", gettext ("Average % Cyan Coverage Per Page"), gettext ("Frequency"), "[:]", @dist);

	for ($i = 0; $i < $#pages; $i++) {
		if ($pages[$i]) {
			$magenta_coverage_per_page[$i] = $magenta[$i] / $pages[$i];
		} else {
			$magenta_coverage_per_page[$i] = 0;
		}
	}

	@dist = &hist ($nslices, $fraction, @magenta_coverage_per_page);
	&plot ("$path/$printer" . "_magenta_per_page.png", gettext ("Average \% Magenta Coverage Per Page"), gettext ("Frequency"), "[:]", @dist);

	for ($i = 0; $i < $#pages; $i++) {
		if ($pages[$i]) {
			$yellow_coverage_per_page[$i] = $yellow[$i] / $pages[$i];
		} else {
			$yellow_coverage_per_page[$i] = 0;
		}
	}

	@dist = &hist ($nslices, $fraction, @yellow_coverage_per_page);
	&plot ("$path/$printer" . "_yellow_per_page.png", gettext ("Average \% Yellow Coverage Per Page"), gettext ("Frequency"), "[:]", @dist);
}

if ($colourspace ne "cmy" && $colourspace ne "pagecount") {
	@dist = &hist ($nslices, $fraction, @black);
	&plot ("$path/$printer" . "_black.png", gettext ("Total Black \% Coverage"), gettext ("Frequency"), "[:]", @dist);

	for ($i = 0; $i < $#pages; $i++) {
		if ($pages[$i]) {
			$black_coverage_per_page[$i] = $black[$i] / $pages[$i];
		} else {
			$black_coverage_per_page[$i] = 0;
		}
	}

	@dist = &hist ($nslices, $fraction, @black_coverage_per_page);
	&plot ("$path/$printer" . "_black_per_page.png", gettext ("Average \% Black Coverage Per Page"), gettext ("Frequency"), "[:]", @dist);
}

if ($opt{'web'}) {
	if (!defined ($png_url)) {
		print gettext ("<p>You must specify --png_url <url> if you specify --web\n</p>");
		exit 0;
	}

	print sprintf (gettext ("<h1>Detailed usage information for printer \"%s\"</h1>\n"), $printer);

	print gettext ("<h3>Printing load over time</h3>");

	print "<table cellpadding=2 cellspacing=2 border=1>\n";
	print "<thead>\n";
	
	print "<td><b>", gettext ("Averaged Over"), "</b></td>\n";
	print "<td><b>", gettext ("Graph"), "</b></td>\n";
	print "</thead>\n<tbody>\n";

	print "<tr>\n<td>", gettext ("Last 7 Days"), "</td>\n";
	print "<td><a href=\"$png_url/$printer\_weekly.png\">$printer\_weekly.png</a></td>\n";

	print "<tr>\n<td>", gettext ("Last 30 Days"), "</td>\n";
	print "<td><a href=\"$png_url/$printer\_monthly.png\">$printer\_monthly.png</a></td>\n";

	print "<tr>\n<td>", gettext ("Last 365.25 Days"), "</td>\n";
	print "<td><a href=\"$png_url/$printer\_annually.png\">$printer\_annually.png</a></td>\n";

	print "<tr>\n<td>", gettext ("Forever"), "</td>\n";
	print "<td><a href=\"$png_url/$printer\_overall.png\">$printer\_overall.png</a></td>\n";

	print "</tbody></table>\n";
	
	print "<h3>", gettext ("Statistics"), "</h3>\n";
	
	print "<table cellpadding=2 cellspacing=2 border=1>\n";
	print "<thead>\n";
	
	print "<td><b>", gettext ("Parameter"), "</b></td>\n";
	print "<td><b>", gettext ("Graph"), "</b></td>\n";
	print "<td><b>", gettext ("Minimum"), "</b></td>\n";
	print "<td><b>", gettext ("Maximum"), "</b></td>\n";
	print "<td><b>", gettext ("Mean"), "</b></td>\n";
	print "<td><b>", gettext ("Standard Deviation"), "</b></td>\n";
	print "<td><b>", gettext ("Median"), "</b></td>\n";
	print "</thead>\n<tbody>\n";
	
	@stats = &calcstats (@ptime);
	print "<tr>\n<td>", gettext ("Parent Time"), "</td>\n";
	print "<td><a href=\"$png_url/$printer\_ptime.png\">$printer\_ptime.png</a></td>\n";
	&webprint (@stats);

	@stats = &calcstats (@ctime);
	print "<tr>\n<td>", gettext ("Child Time"), "</td>\n";
	print "<td><a href=\"$png_url/$printer\_ctime.png\">$printer\_ctime.png</a></td>\n";
	&webprint (@stats);

	@stats = &calcstats (@filesize);
	print "<tr>\n<td>", gettext ("File Size"), "</td>\n";
	print "<td><a href=\"$png_url/$printer\_filesize.png\">$printer\_filesize.png</a></td>\n";
	&webprint (@stats);

	@stats = &calcstats (@pages);
	print "<tr>\n<td>", gettext ("Pages"), "</td>\n";
	print "<td><a href=\"$png_url/$printer\_pages.png\">$printer\_pages.png</a></td>\n";
	&webprint (@stats);

	if ($colourspace ne "mono" && $colourspace ne "pagecount") {
		@stats = &calcstats (@cyan);
		print "<tr>\n<td>", gettext ("Total Cyan"), "</td>\n";
		print "<td><a href=\"$png_url/$printer\_cyan.png\">$printer\_cyan.png</a></td>\n";
		&webprint (@stats);

		@stats = &calcstats (@magenta);
		print "<tr>\n<td>", gettext ("Total Magenta"), "</td>\n";
		print "<td><a href=\"$png_url/$printer\_magenta.png\">$printer\_magenta.png</a></td>\n";
		&webprint (@stats);

		@stats = &calcstats (@yellow);
		print "<tr>\n<td>", gettext ("Total Yellow"), "</td>\n";
		print "<td><a href=\"$png_url/$printer\_yellow.png\">$printer\_yellow.png</a></td>\n";
		&webprint (@stats);

		@stats = &calcstats (@cyan_coverage_per_page);
		print "<tr>\n<td>", gettext ("Cyan/Page"), "</td>\n";
		print "<td><a href=\"$png_url/$printer\_cyan_per_page.png\">$printer\_cyan_per_page.png</a></td>\n";
		&webprint (@stats);

		@stats = &calcstats (@magenta_coverage_per_page);
		print "<tr>\n<td>", gettext ("Magenta/Page"), "</td>\n";
		print "<td><a href=\"$png_url/$printer\_magenta.png\">$printer\_magenta_per_page.png</a></td>\n";
		&webprint (@stats);

		@stats = &calcstats (@yellow_coverage_per_page);
		print "<tr>\n<td>", gettext ("Yellow/Page"), "</td>\n";
		print "<td><a href=\"$png_url/$printer\_yellow_per_page.png\">$printer\_yellow_per_page.png</a></td>\n";
		&webprint (@stats);
	}

	if ($colourspace ne "cmy" && $colourspace ne "pagecount") {
		@stats = &calcstats (@black);
		print "<tr>\n<td>", gettext ("Total Black"), "</td>\n";
		print "<td><a href=\"$png_url/$printer\_black.png\">$printer\_black.png</a></td>\n";
		&webprint (@stats);

		@stats = &calcstats (@black_coverage_per_page);
		print "<tr>\n<td>", gettext ("Black/Page"), "</td>\n";
		print "<td><a href=\"$png_url/$printer\_black_per_page.png\">$printer\_black_per_page.png</a></td>\n";
		&webprint (@stats);
	}
	
	print "</tbody></table><p>\n";
} else {
	print gettext ("Statistical parameters (minimum, maximum, mean, standard deviation and median)\n");
	print "==============================================================================\n";
	@stats = &calcstats (@ptime);
	print gettext ("Parent Processing Time"), ": @stats\n";
	@stats = &calcstats (@ctime);
	print gettext ("Child Processing Time"), ": @stats\n";
	@stats = &calcstats (@filesize);
	print gettext ("File Size"), ": @stats\n";
	@stats = &calcstats (@pages);
	print gettext ("Pages"), ": @stats\n";

	if ($colourspace ne "mono" && $colourspace ne "pagecount") {
		@stats = &calcstats (@cyan);
		print gettext ("Cyan"), ": @stats\n";
		@stats = &calcstats (@magenta);
		print gettext ("Magenta"), ": @stats\n";
		@stats = &calcstats (@yellow);
		print gettext ("Yellow"), ": @stats\n";
		@stats = &calcstats (@cyan_coverage_per_page);
		print gettext ("Cyan Coverage Per Page"), ": @stats\n";
		@stats = &calcstats (@magenta_coverage_per_page);
		print gettext ("Magenta Coverage Per Page"), ": @stats\n";
		@stats = &calcstats (@yellow_coverage_per_page);
		print gettext ("Yellow Coverage Per Page"), ": @stats\n";
	}

	if ($colourspace ne "cmy" && $colourspace ne "pagecount") {
		@stats = &calcstats (@black);
		print gettext ("Black"), ": @stats\n";
		@stats = &calcstats (@black_coverage_per_page);
		print gettext ("Black Coverage Per Page"), ": @stats\n";
	}
}

sub hist {
	my ($nslices, $fraction, @data) = @_;
	my ($val, $min, $max, @bins, $idx, $delta, $i, $step, $start, $x, @graphdata, $total, $cumulative, @sdata);
	
	$max = 0;
	$min = HUGE_VAL;
	
	for ($i = 0; $i < $nslices; $i++) {
		$bins[$i] = 0;
	}

	@sdata = sort {$a <=> $b} @data;
	
	$min = $sdata[0];
	
	$total = 0;

	foreach $val (@sdata) {
		$total += $val;
	}
	
	$cumulative = 0;
	
	$i = 0;

	$max = $total * $fraction;

	foreach $val (@sdata) {
		$cumulative += $val;
		$max = $val;
		
		last if ($cumulative > ($total * $fraction));
	}

	$delta = ($max - $min);
	
	return () if ($delta == 0);

	foreach $val (@sdata) {
		if ($val < $max) {
			$idx = $nslices * (($val - $min) / $delta);
			$idx-- if ($idx == $nslices);
			$bins[$idx]++;
		}
	}
	
	$i = 0;
	$step = $delta / $nslices;
	$start = $min + $step / 2;
	
	foreach (@bins) {
		$x = $start + $step * $i;
		$graphdata[$i] = "$x\t$_";
		$i++;
	}

	return @graphdata;
}

sub plot {
	my ($filename, $xlabel, $ylabel, $xrange, @graphdata) = @_;
	my $i;
	
	die if ($filename eq "");
		
	open GP, "|$params{'gnuplot'}";
	print GP "set xlabel \"$xlabel\"\n";
	print GP "set ylabel \"$ylabel\"\n";
	print GP "set xrange $xrange\n";
	print GP "set grid\n";
	print GP "set nokey\n";
	print GP "set term png colour\n";
	print GP "set output \"$filename\"\n";
	print GP "plot \"-\" with boxes\n";

	foreach (@graphdata) {
		print GP "$_\n";
	}

	close GP;
}

# Returns mean, variance and median.

sub calcstats {
	my @data = sort {$a <=> $b} @_;
	my ($total, $mean, $stddev, $median, $sum, $tmp);
	
	$total = 0;
	
	foreach (@data) {
		$total += $_;
	}
	
	$sum = $total;
	
	$mean = $total / (1 + $#data);
	
	$total = 0;
	
	foreach (@data) {
 		$tmp = $mean - $_;
 		$total += $tmp * $tmp;
	}
	
	$stddev = sqrt ($total) / (1 + $#data);
	
	$total = 0;
	
	foreach (@data) {
		$total += $_;
		
		if ($total >= ($sum / 2)) {
			$median = $_;
			last;
		}
	}
	
	return ($data[0], $data[$#data], $mean, $stddev, $median);
}

sub webprint {
	foreach (@_) {
		print "<td>$_</td>\n";
	}
	
	print "</tr>\n";
}
