#!/usr/bin/perl
#
# Redirect script for BannerFilter 1.31
# http://phroggy.com/bannerfilter/
# 
# This is the main redirector script that Squid will use to
# block those pesky banners.  Add a line like this to squid.conf:
# redirect_program /path/to/redirector.pl
#
use strict;
use warnings;
use File::Basename;
use File::Spec;
use Getopt::Std;

# Build a list of places to look for the config file
my $config='bannerfilter.conf'; # Filename to look for
my @search=();
foreach my $dir (qw(/usr/local/etc /opt/etc /etc /usr/local /opt /Library C:)) {
  next unless(-d $dir);
  foreach my $subdir ('','/bannerfilter','/squid','/squid/bannerfilter','/squid/etc') {
    push @search,File::Spec->catfile($dir.$subdir,$config);
  }
}
push @search,File::Spec->catfile(dirname($0),$config); # same dir as this script

# Handle command-line options
$Getopt::Std::STANDARD_HELP_VERSION=1;
my %Options=();
unless(getopts('c:hv',\%Options)) {
  HELP_MESSAGE();
  exit(1);
}
if(exists $Options{'c'}) { # "-c /path/to/bannerfilter.conf" overrides search list 
  @search=($Options{'c'})
}
if(exists $Options{'h'}) { # getopts() also handles --help
  VERSION_MESSAGE();
  HELP_MESSAGE();
  exit;
}
if(exists $Options{'v'}) { # getopts() also handles --version
  VERSION_MESSAGE();
  exit;
}
# in future, add other options here as needed
if(@ARGV) { # error if other stuff is present on command line
  HELP_MESSAGE();
  exit(1);
}

# Read the configuration file
our (%conf,$configpath);
ReadConf(@search);

# Load the data files
our (@banners,@popups,@frames,@exceptions);
ReadData();

# Trap for SIGHUP, and set a flag if received
our $reload=0;
$SIG{'HUP'}=sub {$reload=1};

# Don't try to buffer output
$|=1;

# Loop infinitely, getting URLs from Squid
while (<STDIN>) {
  my @input=split;
  my $url=shift @input;
  my($newurl,$log);

  # If SIGHUP was received
  ReadConf(@search) if($reload);
  ReadData() if($reload);

  unless($conf{'www'}) {
    print STDERR "ERROR:\n";
    print STDERR "WWW URL not specified in $configpath\n\n";
    print STDERR "Hint: edit $configpath\n";
    print STDERR "and set WWW to the URL of the directory containing the\n";
    print STDERR "included HTML and graphic files.\n";
  }
  my $WWW=$conf{'www'};
  $WWW.='/' unless($WWW=~/\/$/);
  if($url=~/^http:\/\/bannerfilter.internal\/(.*)$/) {
    $newurl=$WWW.$1;
    $log='http://bannerfilter.internal/';
  }
  unless($newurl) { 
    foreach my $x (@exceptions) {
      if(($x=~/(.*)\*(.*)/ ? (index($url,$1)+1 && index($url,$2)+1) : index($url,$x)+1)) {
        $newurl=$url;
        $log="exception $x";
        last;
      }
    }
  }
  unless($newurl) {
    foreach my $z (@frames) {
      my($x,$r)=split /\t/,$z;
      if(($x=~/(.*)\*(.*)/ ? (index($url,$1)+1 && index($url,$2)+1) : index($url,$x)+1)) {
        $newurl=$WWW.$r;
        $log="frame $x";
        last;
      }
    }
  }
  unless($newurl) {
    foreach my $x (@popups) {
      if(($x=~/(.*)\*(.*)/ ? (index($url,$1)+1 && index($url,$2)+1) : index($url,$x)+1)) {
        $newurl=$WWW.'popup.html';
        $log="popup $x";
        last;
      }
    }
  }
  unless($newurl) {
    foreach my $x (@banners) {
      if(($x=~/(.*)\*(.*)/ ? (index($url,$1)+1 && index($url,$2)+1) : index($url,$x)+1)) {
        $newurl=$WWW.$conf{'bannergif'};
        $log="banner $x";
        last;
      }
    }
  }
  LogBanner($url,$log) if($log);
  $newurl=$url unless($newurl);
  print "$newurl\n";
}
exit;

sub ReadData {
  (@banners,@popups,@frames,@exceptions)=();
  my $DATA=File::Spec->rel2abs(dirname($0));
  $DATA=File::Spec->canonpath($conf{'data'}) if($conf{'data'});
  unless(-d $DATA) {
    print STDERR "ERROR:\n";
    print STDERR "Data directory $DATA does not exist.\n\n";
    print STDERR "Hint: edit $configpath\n";
    print STDERR "and set DATA to the correct location of the directory containing the\n";
    print STDERR "data files, or move the data files to $DATA.\n";
    exit(1);
  }

  # Read the list of banners
  foreach my $file ('banners.data','banners.local.data') {
    open(FILE,File::Spec->catfile($DATA,$file)) or ErrorReadingDataFile(File::Spec->catfile($DATA,$file));
    while(<FILE>) {
      chomp;
      s/\r//g;
      push(@banners,$_) if($_ && !(/^#/ || /^\s*$/));
    }
    close(FILE);
  }

  # Read the list of popup windows
  foreach my $file ('popups.data','popups.local.data') {
    open(FILE,File::Spec->catfile($DATA,$file)) or ErrorReadingDataFile(File::Spec->catfile($DATA,$file));
    while(<FILE>) {
      chomp;
      s/\r//g;
      push(@popups,$_) if($_ && !(/^#/ || /^\s*$/));
    }
    close(FILE);
  }

  # Read the list of frame banners
  foreach my $file ('frames.data','frames.local.data') {
    open(FILE,File::Spec->catfile($DATA,$file)) or ErrorReadingDataFile(File::Spec->catfile($DATA,$file));
    while(<FILE>) {
      chomp;
      s/\r//g;
      push(@frames,"$1\t$2") if($_ && !(/^#/) && /^(\S+)\s+(\S+)$/);
    }
    close(FILE);
  }

  # Read the list of exceptions
  foreach my $file ('exceptions.data','exceptions.local.data') {
    open(FILE,File::Spec->catfile($DATA,$file)) or ErrorReadingDataFile(File::Spec->catfile($DATA,$file));
    while(<FILE>) {
      chomp;
      s/\r//g;
      push(@exceptions,$_) if($_ && !(/^#/ || /^\s*$/));
    }
    close(FILE);
  }

  # Clear the flag so we don't do this again next time
  $reload=0;
}

sub LogBanner {
  if($conf{'log'}) {
    open(LOG,">>$conf{'log'}");
    print LOG "$_[0] matches $_[1]\n";
    close LOG;
  }
}

sub ReadConf {
  # Arguments: list of paths (including filename) to search

  # Read the configuration file, and store values in %hash.
  # Format is "label = value" on each line; the label is case-insensitive
  # and stored in lower-case.  Any whitespace is allowed but not required
  # before and after the equals sign.  Comments and blank lines are
  # ignored.  Extra lines will be read but will be ignored by the script.

  %conf=();
  $configpath=undef;
  foreach my $config (@_) {
    if(-e $config) {
      $configpath=File::Spec->rel2abs($config);
      last;
    }
  }
  unless($configpath) {
    print STDERR "ERROR:\n";
    print STDERR "Can't find configuration file.\n\n";
    print STDERR "Hint: try putting bannerfilter.conf in the same place as this script,\n";
    print STDERR "or see the README file for other suggestions.\n";
    exit(1);
  }
  unless(open(CONF,$configpath)) {
    print STDERR "ERROR:\n";
    print STDERR "Can't open $configpath.\n\n";
    print STDERR "Hint: make sure permissions are set so the file is readable.\n";
    print STDERR "If this is not the correct location, try specifying the path\n";
    print STDERR "to the configuration file with the -c option.\n";
    exit(1);
  }
  my $line=0;
  while(<CONF>) {
    chomp;
    $line++;
    next unless($_);
    s/\r//g; # strip CR, needed for Windows
    next if(/^\s*#/ || /^\s*;/ || /^\s*$/);
    if(/^\s*([a-z]+)\s*?=\s*(\S*)\s*$/i) {
      $conf{lc($1)}=$2;
    } else {
      print STDERR "ERROR:\n";
      print STDERR "Syntax error in $configpath on line $line\n\n";
      print STDERR "Hint: edit the configuration file and fix the problem, or\n";
      print STDERR "specify a different configuration file to use.\n";
      exit(1);
    }
  }
  close(CONF);
}

sub HELP_MESSAGE {
  my $scriptname=basename($0);
  print "Syntax: $scriptname [-c /path/to/bannerfilter.conf]\n";
}

sub VERSION_MESSAGE {
  print "BannerFilter version 1.31\n";
}

sub ErrorReadingDataFile {
  my($filename)=@_;
  print STDERR "ERROR:\n";
  print STDERR "Can't open $filename.\n\n";
  print STDERR "Hint: if this is not the correct location to look for data files,\n";
  print STDERR "edit $configpath and make the necessary change.\n";
  print STDERR "If this is the correct location, make sure the directory is really\n";
  print STDERR "there, and make sure file permissions are set correctly.\n";
  exit(1);
}
