#!/usr/pkg/bin/perl
#
# 	$Id: newshound.pl,v 1.1 2002/03/24 13:10:18 proff Exp $	
#       $Log: newshound.pl,v $
#       Revision 1.1  2002/03/24 13:10:18  proff
#       add perl newshound
#
#  Revision 1.3  2001/12/30 09:05:30  jer
#  cleaning up variables and code
#
#

use Net::NNTP;
use Getopt::Std;
use IO::File;
use strict;
##
## defaults
##
my %CONF;
my $VERSION = '$Revision: 1.1 $';
  $CONF{'CONF'} |= "/usr/local/etc/nntpcache/newshound.conf";

sub usage
{
  printf("Usage: $CONF{'PROG'} [options...]\n");
  printf("-c Collect all newsgroups in %s file.\n", $CONF{'HISTORY'});
  printf("-f Fetch newsgroup messages from the %s file.\n", $CONF{'HOUND'});
  printf("-h Just grab article headers.\n");
  printf("-A Automatic mode. Collects news every %d minutes\n", $CONF{'MINS'});
  printf("-S Setting details for %s.\n",$CONF{'PROG'});
  printf("-b Remove all newsgroups with ban strings in them.\n");
  printf("-d nn Debug Mode level. 1 2 or 3\n");
  printf("-u [l][m][h]        Update %s index setting.\n", $CONF{'HOUND'});
  printf("-n [newsgroup]      Just update this newsgroup's article index.\n"); 
  printf("-l msg              Low mark on number of articles read.\n");
  printf("-g nn               Download top nn newsgroups from hound.list. 0=all.\n");
  printf("-s news.server.com  News Server you access. Default %s\n",$CONF{'SERVER'});
  printf("-p portnumber       News Port nntpcached is on. Default %s\n",$CONF{'PORT'});
  printf("\nSend bug reports to jer\@gweep.net\n");
}

sub bangrps()
{
  my $cp=new IO::File("<$MAIN::HF");
  unlink($MAIN::WF);
  my $tp=new IO::File(">$MAIN::WF");

  if ($CONF{'DEBUG'}>=1) {
    print("Removing banned newsgroups.\n");
  }

  while(<$cp>) {
    my $line = $_;
    chomp $line;
    my $bangroup=0;
    foreach my $b (@{$CONF{'BANSTR'}}) {
      if ($line =~ /$b/) {
	$bangroup++;
      }
    }
    if (! $bangroup)  {
      print $tp "$line\n";
    } else {
      if ($CONF{'DEBUG'}>=3) {
	printf("%s has been banned.\n", $line);
      }
    }
  }
  close($tp);
  close($cp);
  if ($CONF{'DEBUG'}>=3) {
    printf("Renaming %s to %s\n", $MAIN::HF, $MAIN::BF);
  }
  rename($MAIN::HF, $MAIN::BF);
  if ($CONF{'DEBUG'}>=3) {
    printf("Renaming %s to %s\n", $MAIN::WF, $MAIN::HF);
  }
  rename($MAIN::WF, $MAIN::HF);
}

sub collect()
{
  my $cp=new IO::File("<$MAIN::CF") || die "Can't open $MAIN::CF : $!";

  if ($CONF{'DEBUG'}>=1) {
    print "Collecting newsgroups.\n";
  }
  my %LIST;
  while(<$cp>) {
    next if /^\d+\s/;
    my $fs = $_;
    chomp $fs;
    $fs =~ s/\/\d+$//;

    if($CONF{'DEBUG'}>=3) {print "Adding $fs\n";}
    $LIST{$fs}++;
  }
  close($cp);

  unlink($MAIN::HF);
  my $hp = new IO::File(">$MAIN::HF");
  foreach my $foo (sort {$LIST{$b} <=> $LIST{$a}} keys %LIST) {
    last if ($LIST{$foo} < $CONF{'LOWMARK'});
    print $hp "$foo $LIST{$foo}\n";
  }
  if($CONF{'DEBUG'}>=1) {
    print "$MAIN::HF written : ", scalar keys %LIST , " entries written\n";
  }
  close($hp);
}

sub check
{ # Check to see if article is cached 
  my($server, $group, $article) = @_;
  my $result;

  return 0 unless ($server && $group);
  $group =~ s/^.*\.\///;
  $group =~ s/\./\//g;

  my $tmp = sprintf("%s/%s/%s/%d", $CONF{'CACHEDIR'}, $server, $group,
		    $article);
  my $xtmp = $tmp . "_xover";
  my $htmp = $tmp . "_head";
  if (( (! -f $tmp) && (! -f $xtmp )) ||
      ( (! -f $htmp) && $CONF{'HEAD'}))
  {
    $result=1;
    if ($CONF{'DEBUG'}>=3) # This routine is called from a loop so is 
    {
      if ($CONF{'HEAD'})   # automatically a level 3 debug.		
      {
	printf("NH  :> %s is NOT cached\n", $htmp);
      }
      else
      {
	printf("NH  :> %s or %s is NOT cached\n", $tmp, $xtmp);
      }
    }
  }
  else
  {
    $result=0;
    if ($CONF{'DEBUG'}>=3)
    {
      if ($CONF{'HEAD'}) {
	printf("NH  :> %s is cached\n", $htmp);
      }
      else {
	printf("NH  :> %s or %s is cached\n", $tmp, $xtmp);
      }
    }
  }
  return $result;
}


sub fetch
{ 
  my $fp = new IO::File("<$MAIN::HF");
  if (!$fp)
  { 
    print STDERR "Hound list not found.\n";
    return (-1);
  } 
  
  unlink($MAIN::WF);
  my $tp = new IO::File(">$MAIN::WF");
  if (! $tp)
  {
    print STDERR "Can't create work file.\n";
    return (-1);
  }
  
  my $nntp_conn = new Net::NNTP("$CONF{'SERVER'}:$CONF{'PORT'}");
  if(! $nntp_conn) {
    print STDERR "make_connection failed.\n";
    return (-1);
  } 
  
  if ($CONF{'DEBUG'}>=1) {
    print "Fetching news articles.\n";
  }
  
  my $count=0;
  while(<$fp>) {
    last if (($count++ >= $CONF{'GROUP'}) && ($CONF{'GROUP'} >0));
    chomp $_;
    my($tmp,$last) = split(/\s+/,$_);
    
    my($srv,$grp) = split(/\//,$tmp,2);
    $grp =~ s#/#.#g;
#    print STDERR "Checking $grp from $srv\n";
    my($num,$low,$high,$group) = $nntp_conn->group($grp);

    if ($last>$low) {
      $low=$last+1; 
    }
    my $loop = $low;
    
    if ($CONF{'DEBUG'}>=2) {
      printf("NH  :> %d articles to cache from %s\n", (($high+1)-$loop), $grp);
    }
    for($loop=$low; $loop<=$high; $loop++)
    { 
      if ($CONF{'DEBUG'}>=3)
      { 
	printf("NH  :> %d left to cache from %s\n", (($high+1)-$loop), $grp);
      }
      my $returns;
      if (check($srv, $grp, $loop))
      {
	if ($CONF{'HEAD'})
	{
	  $returns = $nntp_conn->head($loop);
	}
	else
	{
	  $returns = $nntp_conn->article($loop);
	}
      }
    }
    printf $tp "%s/%s %s\n", $srv, $grp, $high;
  }
  
  while(<$fp>) { print $tp $_; }
  close($tp);
  close($fp);
  if ($CONF{'DEBUG'}>=3) {
    printf("Renaming %s to %s\n", $MAIN::HF, $MAIN::BF);
  }
  rename($MAIN::HF, $MAIN::BF);
  if ($CONF{'DEBUG'}>=3) {
    printf("Renaming %s to %s\n", $MAIN::WF, $MAIN::HF);
  }
  rename($MAIN::WF, $MAIN::HF);
  $nntp_conn->quit();
  return (0);
}

sub update
{ 
  my($mode,$sgroup) = @_;

  my $fp = new IO::File("<$MAIN::HF") || die "Hound list $MAIN::HF not found";
  my $tp = new IO::File(">$MAIN::WF") || die "Can't create work file $MAIN::WF";

  my $nntp_conn = new Net::NNTP("$CONF{'SERVER'}") || die "make_connection failed";

  while(<$fp>) {
    my $fs = $_;
    chomp $fs;
    my($tmp,$last) = split(/\s+/,$fs);
    
    my($srv,$grp) = split('./',$tmp);
    $grp =~ s#/#.#g;

    if ( ($sgroup eq $grp) || ($sgroup eq "-all-"))
    {
      my($num,$low,$high) = $nntp_conn->group($grp);      
      if ($mode eq "l") {
	$tmp = $low;
      }
      if ($mode eq "m") {
	$tmp =	($low+(($high-$low)/2));
      }
      if ($mode eq "h") {
	$tmp = $high;
      }
      if ($CONF{'DEBUG'}>=3) {
	printf("NH  :> setting newsgroup pointer to %s\n", $tmp);
      }
      printf $tp "%s/%s %s\n", $srv, $grp, $tmp;
    }
    else
    {
      printf $tp, "%s\n", $fs;
    }
  }
  $nntp_conn->quit();
  close($tp);
  close($fp);
  if ($CONF{'DEBUG'}>=3) {
    printf("Renaming %s to %s\n", $MAIN::HF, $MAIN::BF);
  }
  rename($MAIN::HF,$MAIN::BF);
  if ($CONF{'DEBUG'}>=3) {
    printf("Renaming %s to %s\n", $MAIN::WF, $MAIN::HF);
  }
  rename($MAIN::WF,$MAIN::HF);
  
  return (0);
}

sub stats
{
  my($prog) = $_[0];
   
  printf("Setting details.\n\n");
  printf("Program name       :- %s\n", $prog); 
  printf("Version number     :- %s\n", $VERSION);
  printf("Cachedir           :- %s\n", $CONF{'CACHEDIR'});
  printf("News server set to :- %s\n", $CONF{'SERVER'});
  printf("News port set to   :- %s\n", $CONF{'PORT'});
  printf("Cache history file :- %s\n", $CONF{'HISTORY'});
  printf("Hound list file    :- %s\n", $CONF{'HOUND'});
  printf("Auto Mode running  :- %d minute(s)\n", $CONF{'MINS'});

  my $fp = new IO::File("<$MAIN::HF") || die "file $MAIN::HF not found";
  my @count = <$fp>;
  close($fp);
  printf("Newsgroups sorted  :- %d\n", $#count); 
  printf("Newsgroup banning is %s.\n", $CONF{'BAN'}?"on":"off");
  if ($CONF{'BAN'})
  {
    print "These words are in the ban list.\n";
    foreach my $str (@{$CONF{'BANSTR'}}) {
      printf("%s\n", $str);
    }
  }
  print "Send bug reports to jer\@gweep.net\n";
  return 1;
}


sub getconf {
 
  my $CNF = new IO::File("<$CONF{'CONF'}") || warn "Can't open $CONF{'CONF'} : $!";
  while(<$CNF>) {
    next if /^[;#]/;
    if(/(\S+)\s+(.*)\s*$/) {
      my $keyword = uc($1);
      my $value = $2;
      if($CONF{'DEBUG'}>=3) {
	print STDERR "Key $keyword Value $value\n";
      }
      if($CONF{$keyword}) {
	if(ref $CONF{$keyword} eq 'ARRAY') {
	  push(@{$CONF{$keyword}}, $value);
	} else {
	  my $tmp = $CONF{$keyword};
	  $CONF{$keyword} = [$tmp, $value];
	}
      } else {
	$CONF{$keyword} = $value;
      }
    }
  }
  close($CNF);
}

sub set_defaults  {
  $CONF{'CACHEDIR'} = "/u/news/nntpcache/" unless $CONF{'CACHEDIR'};

  $CONF{'PORT'} |= 119 unless $CONF{'PORT'};
  $CONF{'HOUND'} |= "hound.list" unless $CONF{'HOUND'};
  $CONF{'HISTORY'} |= "cache.history" unless $CONF{'HISTORY'};
  $CONF{'MINS'} |= 15 unless $CONF{'MINS'};
  $CONF{'PROG'} |= $0 unless $CONF{'PROG'};
  $CONF{'SERVER'} |= "news.gweep.net" unless $CONF{'SERVER'};
}

##
## Main Routine
##


my %opts;
getopts("g:l:s:p:u:n:F:cbd:fhSA?",\%opts);

if($opts{'d'}) {    # 1=report function, 2=misc jobs, 3=while loops 4=Verbose
  $CONF{'DEBUG'} = $opts{'d'};
}

getconf();   
set_defaults();
$MAIN::CF =sprintf("%.128s/%.32s", $CONF{'CACHEDIR'}, $CONF{'HISTORY'});
$MAIN::HF =sprintf("%.128s/%.32s", $CONF{'CACHEDIR'}, $CONF{'HOUND'});
$MAIN::BF =sprintf("%.128s/%.32s.bak", $CONF{'CACHEDIR'}, $CONF{'HOUND'});
$MAIN::WF = sprintf("%.128s/hound.wf", $CONF{'CACHEDIR'});

if($opts{'g'}) {
  $CONF{'GROUP'} = $opts{'g'};
}
if($opts{'l'}) {
  $CONF{'LOWMARK'} = $opts{'l'};
}
my $mode=0;
if ($opts{'f'}) {
  $mode=1;
} elsif($opts{'c'}) {
  $mode=2;
} elsif($opts{'S'}) {
  $mode=4;
}elsif($opts{'A'}) {
  $mode=5;
}

if($opts{'h'}) {
  $CONF{'HEAD'}=1;
}
if($opts{'b'}) {
  $CONF{'BAN'}=1;
}
if($opts{'s'}) {
  $CONF{'SERVER'} = $opts{'s'};
}
if($opts{'p'}) {
  $CONF{'PORT'} = $opts{'p'};
}
  my $app;
if($opts{'u'}) {
  $app = $opts{'u'};
  $mode=3;
}
  my($single,$sgroup);
if($opts{'n'}) {
  $single=1;
  $sgroup = $opts{'n'};
}
  if($opts{'?'} ) {
    die usage();
  }

if ($mode == 1) {fetch() ;}
if ($mode==2) { collect();  if ($CONF{'BAN'} ) { bangrps();}}
if ($mode==3) {
  if ($single) {
    update($app, $sgroup);
  }
  else {
    update($app, "-all-");
  }
}
if($mode==4) {
  stats($0);
}
if($mode == 5) {
  while(1)
  {
    fetch();
    if ($CONF{'DEBUG'}>=1) {
      printf("NH  :> Sleeping for %d minutes.\n", $CONF{'MINS'});
    }
    sleep($CONF{'MINS'}*60);
  }
}
 
if (! $mode) {
  printf("%s Version %s\n", $0, $VERSION);
  printf("You need at least one of these options [-c] [-f] [-u] [-A].\n");
  printf("Do :- '%s -?' for help.\n", $0);
  printf("Send bug reports to jer\@gweep.net\n");
}

