#!/bin/sh
#
# @(#)proc-news.sh,v 1.12 2004/04/23 06:14:32 kim Exp
#
/usr/bin/awk '
# an awk script 
# an NNTP log summary report generator
#
# NOTE: for systems that are not as yet using the new 4.3 BSD syslog
# (and therefore have nntp messages lumped with everything else), it
# would be best to invoke this script thusly:
#
#	egrep nntp syslog.old | awk -f nntp_awk > report_of_the_week
#
# because this script will include in the report all messages in the log
# that it does not recognize (on the assumption that they are errors to
# be dealt with by a human).
#
# Erik E. Fair <fair@ucbarpa.berkeley.edu>
# May 17, 1986 - Norwegian Independence Day
#
# Recognize some new things - February 22, 1987
# Erik E. Fair <fair@ucbarpa.berkeley.edu>
#
# fix "xmt is not an array" bug - March 11, 1987
# Change Elapsed/CPU fields to break out time values, HH:MM:SS
# Erik E. Fair <fair@ucbarpa.berkeley.edu>
#
# Add reporting for newnews commands - August 27, 1987
# Erik E. Fair <fair@ucbarpa.berkeley.edu>
#
# Add nntpxmit connection attempt counting/reporting - December 7, 1987
# Erik E. Fair <fair@ucbarpa.berkeley.edu>
#
# Some hacking on 11 Nov 89, tale.  Deal with newsxd and change output
# format a little.
#
# More whacking early December, to stop listing readers on individual machines
# but instead summarize the domain.
#
# 18 Jan 90, tale -- let it recognise new nntplink minute stats.

BEGIN {
  # set up an array to use for summarizing domains
  local["au"] = 2;
  local["br"] = 2;
  local["cn"] = 2;
  local["hk"] = 2;
  local["jp"] = 2;
  local["mx"] = 2;
  local["my"] = 2;
  local["pl"] = 2;
  local["tw"] = 2;
  local["uk"] = 2;
}
NR == 1 {
  # to cope with different starting columns in syslog and our modified fake
  # Our mods to fakesyslog.c drop the redundant "localhost" and the
  # "nntpd" prepended before each [].  We store nntpd and nntplink output
  # in separate files.
  if ($4 ~ /^\[[0-9]*\]:$/)
    cmdfield = 6;
  else
    cmdfield = 7;
  hostfield = cmdfield - 1;
  secondfield = cmdfield + 1;
  thirdfield = secondfield + 1;
  fourthfield = thirdfield + 1;
  fifthfield = fourthfield + 1;
  sixthfield = fifthfield + 1;
  seventhfield = sixthfield + 1;
  eighthfield = seventhfield + 1;
  ninthfield = eighthfield + 1;
  tenthfield = ninthfield + 1;
  eleventhfield = tenthfield + 1;
  twelfthfield = eleventhfield + 1;
  thirteenthfield = twelfthfield + 1;
  fourteenthfield = thirteenthfield + 1;
  starttime = sprintf("%s %2d %.8s", $1, $2, $3);
}
{
  lasttime = sprintf("%s %2d %.8s", $1, $2, $3);
  host = $hostfield;
  gsub(/:$/, "", host);
}
$hostfield == "stats:" {
  msgid = 1;
  msgid_dups += $seventhfield;
  msgid_new += $ninthfield;
  msgid_cancel += $eleventhfield;
  msgid_freed += $thirteenthfield;
  next;
}
$cmdfield == "group" {
  readers = 1;
  ng[$secondfield]++;
  next;
}
$cmdfield == "ihave" {
  if ($secondfield == "attempted") {
    noxfer = 1;
    noxfers[host]++;
  } else {
    receive = 1;
    rec[host]++;
    if ($thirdfield == "accepted") {
      rec_accept[host]++;
      if ($fourthfield == "failed") rec_failed[host]++;
    } else if ($thirdfield == "rejected") rec_refuse[host]++;
  }
  next;
}
# this is from version 1.4 of nntpd
$cmdfield == "ihave_stats" {
  receive = 1;
  rec[host] += $thirdfield + $fifthfield + $seventhfield;
  rec_accept[host] += $thirdfield;
  rec_refuse[host] += $fifthfield;
  rec_failed[host] += $seventhfield;
  next;
}
$cmdfield == "connect" && $hostfield != "msgid:" {
  systems[host]++;
  next;
}
# nntpxmit/nntplink connection errors
# Ooooh! I *wish* awk had N dimensional arrays,
# so I would not have to throw away the error message here!
$cmdfield == "hello:" || $cmdfield == "socket():" {
  conn[host]++;
  if ($secondfield == "Connection" && $thirdfield == "refused")
    rmt_fail[host]++;
  else
    open_fail[host]++;
  next;
}
# nntplink connection went away
$cmdfield  == "read()" {
  open_fail[host]++;
  next;
}
# we will get stats from this, do not count conn[]
# note that the "xfer" stats offered by nntplink are tabulated a little
# further down from here.
$cmdfield == "xfer:" {
  open_fail[host]++;
# since these are expected to be few in number, we still print
# the exact error (no "next;" statement here).
}
$cmdfield == "greeted" {
  conn[host]++;
  rmt_fail[host]++;
  next;
}
$cmdfield == "host" && $secondfield == "unknown" {
  conn[host]++;
  ns_fail[host]++;
  next;
}
# nntplink problems
# we see "sent IHAVE" and the problem response
$cmdfield == "sent" {
  if ($secondfield == "IHAVE") xmt_failed[host]++;
  next;
}
# nntplink problems
# we see "article ID failed(saved) and the problem response
$cmdfield == "article" && $thirdfield ~ /^failed/ { next; }
# nntpd connection abort - all "broken pipe" right now
$cmdfield == "disconnect:" { next; }
# syslogd shit
$cmdfield == "repeated" { next; }
# inews shit
$fifthfield == "spooled" { next; }
# high load average
$hostfield == "loadav" { next; }
# nntplink switched log files
/Resetting to use new logfile/ { next; }
# nntplink connection went away
# we do not count this as an error since we already
# have used this connection for real work
/connection timed out while reading reply/ { next; }
/Link down for/ {
  linkdowns = 1;
  if ($fourthfield > linkdown[host]) linkdown[host] = $fourthfield;
  next;
}
$cmdfield == "exit" {
  if ($secondfield > 0) readers = 1;
  articles[host] += $secondfield;
  groups[host] += $fourthfield;
  next;
}
$cmdfield == "xmit" {
  xmt_cpu[host] += $thirdfield + $fifthfield;
  xmt_ela[host] += $seventhfield;
  next;
}
$cmdfield == "times" {
  cpu[host] += $thirdfield + $fifthfield;
  ela[host] += $seventhfield;
  next;
}
$cmdfield == "stats" {
  transmit = 1;
  if ($eleventhfield == "connects")
    conn[host] += $tenthfield;
  else
    conn[host]++;
  xmt[host] += $secondfield;
  xmt_accept[host] += $fourthfield;
  xmt_refuse[host] += $sixthfield;
  xmt_failed[host] += $eighthfield;
  next;
}
#
#  For the Nth time, I wish awk had two dimensional associative
#  arrays. I assume that the last request is the same as all the
#  others in this section of logfile.
#
$cmdfield == "newnews" {
  polled = 1;
  poll[host] ++;
  poll_asked[host] = $secondfield;
  next;
}
$cmdfield == "newnews_stats" {
  poll_offered[host] += $thirdfield;
  poll_took[host] += $fifthfield;
  next;
}
$cmdfield == "post" {
  if ($secondfield == "rejected")
    next;
  readers = 1;
  post[host]++;
  next;
}
$cmdfield ~ "(transfer_)?timeout" {
  timeout[host]++;
  timeouts = 1;
  next;
}
$cmdfield == "unrecognized" {
  xcmd = $secondfield
  if (xcmd == "XMODE" || xcmd == "xmode")
    next;
  if (xcmd == "XTHREAD" || xcmd == "xthread")
    next;
  unknown[host]++;
  unrecognized++;
  if (xcmd !~ /^[A-Za-z]+$/ || xcmd ~ /[A-Z][a-z]/)
    unrec["Garbage"]++;
  else
    unrec[xcmd]++;
  curious = 1;
  next;
}
$cmdfield == "refused" {
  splut=1;
  refused[host]++;
  next;
}
# A few things from the Collyer nntpd
/IP address .* denied second IHAVE/ {
  attacked=1;
  aggressors[$secondfield]++;
  next;
}
/disconnect: Broken pipe/ {
  broken=1;
  pipebreakers[host]++;
  next;
}
/too many articles/ {
  flooded=1;
  flooders[host]++;
  next;
}
/timed out; / {
  slacked=1;
  slackers[host]++;
  next;
}
$cmdfield == "yes" || $cmdfield == "cmd" || $cmdfield == "listgroup" {
  next;
}
### Print anything that we do not recognize in the report
{
  s = ""
  for (i = cmdfield; i <= NF && i - cmdfield < 8; i++)
    s = s " " $i;
  dups++;
  duperrs[s]++;
  next;
}
END {
  printf "NNTP log summary from %s to %s\n", starttime, lasttime;

  if (newsxds) {
    printf("\n");
    printf("News Transmission Daemon Activity:\n");
    for (s in newsxd) {
      if (s == "start")
        printf("newsxd starts: %d\n",newsxd["start"]);
      else if (s== "reinit")
        printf("newsxd reinitialisations: %d\n",newsxd["reinit"]);
      else printf("newsxd shut downs by signal %d: %d\n",s,newsxd[s]);
    }
  }

### Article Exchange With Peers (other servers) Statistics
  if (polled) for(s in poll) servers[s]++;
  if (receive) for(s in rec) servers[s]++;
  if (transmit) for(s in xmt) servers[s]++;
  if (noxfer) for(s in noxfers) servers[s]++;

  if (receive) {
    printf("\n");
    printf("Article Reception        Offered        Took          Toss          Fail\n");
    printf("Contacting Host            To Us     Total  Pct    Total  Pct    Total  Pct\n");     
    for(s in rec) {
      nrec += rec[s];
      nrec_accept += rec_accept[s];
      nrec_refuse += rec_refuse[s];
      nrec_failed += rec_failed[s];

      they_offered = rec[s];
      if (they_offered == 0) they_offered = 1;
      we_toss = (rec_refuse[s] / they_offered) * 100 + 0.5;
      we_took = (rec_accept[s] / they_offered) * 100 + 0.5;
      we_fail = (rec_failed[s] / they_offered) * 100 + 0.5;

      printf("%-25.25s %6d    %6d %3d%%   %6d %3d%%   %6d %3d%%\n", s, rec[s], rec_accept[s], we_took, rec_refuse[s], we_toss, rec_failed[s], we_fail);
    }

    they_offered = nrec;
    if (they_offered == 0) they_offered = 1;
    we_toss = (nrec_refuse / they_offered) * 100 + 0.5;
    we_took = (nrec_accept / they_offered) * 100 + 0.5;
    we_fail = (nrec_failed / they_offered) * 100 + 0.5;
    printf("------------------------- ------    -----------   -----------   -----------\n");
    printf("%-25s %6d    %6d %3d%%   %6d %3d%%   %6d %3d%%\n", "TOTALS", nrec, nrec_accept, we_took, nrec_refuse, we_toss, nrec_failed, we_fail);
  }

###############################################################################
  if (polled) {
    printf("\n");
    printf("Article Transmission (they poll us)\n");
    printf("System                      Conn  Offrd   Took   Elapsed       CPU  Pct  Groups\n");
    npoll = 0;
    npoll_offered = 0;
    npoll_took = 0;
    npoll_cpu = 0;
    npoll_ela = 0;

    for(s in poll) {
      npoll += poll[s];
      npoll_offered += poll_offered[s];
      npoll_took += poll_took[s];

      if (rec[s]) {
        printf("%-25.25s %6d %6d %6d  (see Article Reception)  %s\n", s, poll[s], poll_offered[s], poll_took[s], poll_asked[s]);
      } else {
        npoll_ela += ela[s];
        npoll_cpu += cpu[s];

        e_hours = ela[s] / 3600;
        e_sec   = ela[s] % 3600;
        e_min   = e_sec / 60;
        e_sec   %= 60;

        c_hours = cpu[s] / 3600;
        c_sec   = cpu[s] % 3600;
        c_min   = c_sec / 60;
        c_sec   %= 60;

        tmp = ela[s];
        if (tmp == 0) tmp = 1;
        pct = ((cpu[s] / tmp) * 100.0 + 0.5);

        printf("%-25.25s %6d %6d %6d %3d:%02d:%02d %3d:%02d:%02d %3d%%  %s\n", s, poll[s], poll_offered[s], poll_took[s], e_hours, e_min, e_sec, c_hours, c_min, c_sec, pct, poll_asked[s]);
      }
    }
    printf("\n%-25s %6d %6d %6d", "TOTALS", npoll, npoll_offered, npoll_took);
    if (npoll_ela > 0 && npoll_cpu > 0) {

      e_hours = npoll_ela / 3600;
      e_sec   = npoll_ela % 3600;
      e_min   = e_sec / 60;
      e_sec   %= 60;

      c_hours = npoll_cpu / 3600;
      c_sec   = npoll_cpu % 3600;
      c_min   = c_sec / 60;
      c_sec   %= 60;

      tmp = npoll_ela;
      if (tmp == 0) tmp = 1;
      pct = ((npoll_cpu / tmp) * 100.0 + 0.5);

      printf(" %3d:%02d:%02d %3d:%02d:%02d %3d%%\n", e_hours, e_min, e_sec, c_hours, c_min, c_sec, pct);
    }
  }

###############################################################################
  if (transmit) {
    printf("\n");
    printf("Article Transmission     Offered        Took          Toss          Fail\n");
    printf("Host Contacted             Total     Total  Pct    Total  Pct    Total  Pct\n");
    for(s in xmt) {
      we_offered = xmt[s];
      if (we_offered == 0) we_offered = 1;
      they_toss = (xmt_refuse[s] / we_offered) * 100 + 0.5;
      they_took = (xmt_accept[s] / we_offered) * 100 + 0.5;
      they_fail = (xmt_failed[s] / we_offered) * 100 + 0.5;

      printf("%-25.25s %6d    %6d %3d%%   %6d %3d%%   %6d %3d%%\n", s, xmt[s], xmt_accept[s], they_took, xmt_refuse[s], they_toss, xmt_failed[s], they_fail);

      nxmt        += xmt[s];
      nxmt_accept += xmt_accept[s];
      nxmt_refuse += xmt_refuse[s];
      nxmt_failed += xmt_failed[s];
    }

    we_offered = nxmt;
    if (we_offered == 0) we_offered = 1;
    they_toss = (nxmt_refuse / we_offered) * 100 + 0.5;
    they_took = (nxmt_accept / we_offered) * 100 + 0.5;
    they_fail = (nxmt_failed / we_offered) * 100 + 0.5;
    printf("------------------------- ------    -----------   -----------   -----------\n");
    printf("%-25s %6d    %6d %3d%%   %6d %3d%%   %6d %3d%%\n", "TOTALS", nxmt, nxmt_accept, they_took, nxmt_refuse, they_toss, nxmt_failed, they_fail);

    printf("\n");
    printf("Outgoing Transmission Connections       ----------errors----------\n");
    printf("System                      Conn     OK     NS    Net    Rmt   Pct\n");
    for(s in xmt) {
      tot = conn[s];
      if (tot == 0) tot = 1;
      errs = rmt_fail[s] + ns_fail[s] + open_fail[s];
      ok = (conn[s] - errs);
      printf("%-25.25s %6d %6d %6d %6d %6d  %3d%%\n", s, conn[s], ok, ns_fail[s], open_fail[s], rmt_fail[s], (100.0 * errs / tot + 0.5));
      ct_tot += conn[s];
      ct_ok  += ok;
      ct_ns  += ns_fail[s];
      ct_net += open_fail[s];
      ct_rmt += rmt_fail[s];
    }
    tot = ct_tot;
    if (tot == 0) tot = 1;
    errs = ct_ns + ct_net + ct_rmt;
    printf("------------------------- ------ ------ ------ ------ ------ -----\n");
    printf("%-25s %6d %6d %6d %6d %6d  %3d%%\n", "TOTALS", ct_tot, ct_ok, ct_ns, ct_net, ct_rmt, (100.0 * errs / tot + 0.5));
  }

### Article Readership Statistics

  if (readers) {
    printf("\n");
    printf("NNTP readership statistics  Conn Articles Groups Post\n");
    for(s in systems) {

### servers are different animals; they do not belong in this part of the report

      if (servers[s] > 0 && groups[s] == 0 && articles[s] == 0)
        continue;

### report the curious server pokers and refused systems elsewhere

      if (groups[s] == 0 && articles[s] == 0 && post[s] == 0 && noxfers[s] == 0) {
	#if (refused[s] != systems[s]) {
	#  unknown[s] += systems[s];
	#  curious = 1;
	#}
        continue;
      }

      nconn += systems[s];
      nart += articles[s];
      ngrp += groups[s];
      npost += post[s];

      # V7 awk is so damn annoying.  Cannot match against variable patterns.
      # so instead i break apart host name and compare elements from the rear
      nso = split(s, sp, ".");
      domain = "*." sp[nso - 1] "." sp[nso];
      if (s ~ /^[0-9.]+$/) domain = "DNS unavailable";
      for (l in local) {
        nl = split(l, lp, ".");
        ns = nso;
        found = 1;
        while ( nl > 0 ) {
          if ( lp[nl--] != sp[ns--] ) {
            found = 0; nl=0;
          }
        }
       	if (found) {
	  ns = nso;
	  domain = sp[ns--];
	  for (i = 0; i < local[l]; i++) {
	    domain = sp[ns--] "." domain;
	  }
	  domain = "*." domain;
	}
      }

      rep_sys[domain] += systems[s];
      rep_art[domain] += articles[s];
      rep_grp[domain] += groups[s];
      rep_pst[domain] += post[s];
    }
    for (r in rep_sys) {
      printf("%-25.25s %6d %8d %6d %4d\n", r, rep_sys[r], rep_art[r], rep_grp[r], rep_pst[r]) | "sort";
    }
    close("sort");
    printf("------------------------- ------ -------- ------ ----\n");
    printf("%-25s %6d %8d %6d %4d\n", "TOTALS", nconn, nart, ngrp, npost);
  }

###############################################################################
  if (timeouts) {
    printf("\n");
    printf("NNTP timeouts\n");
    for(s in timeout) {
      printf("%-35s %5d\n", s, timeout[s]);
    }
  }
  if (splut) {
    printf("\n");
    printf("Refused connections\n");
    for(s in refused) {
      if (refused[s] > 0)
        printf("%-35s %5d\n", s, refused[s]);
    }
  }
  if (noxfer) {
    printf("\n");
    printf("Refused transfers\n");
    for(s in noxfers) {
      if (noxfers[s] > 0)
        printf("%-35s %5d\n", s, noxfers[s]);
    }
  }
###############################################################################
  if (curious) {
    printf("\n");
    printf("Unrecognized commands\n");
    for(s in unknown) {
      printf("%-35s %5d\n", s, unknown[s]);
    }
  }
###############################################################################
  if (unrecognized) {
    printf("\n");
    printf("Unrecognized NNTP commands\n");
    for(s in unrec) {
      printf("%-35s %5d\n", s, unrec[s]);
    }
  }
###############################################################################
  if (attacked) {
    printf("\n");
    printf ("Feeds that tried to open a second connection to nntpd\n");
    for(s in aggressors)
      printf("%-35s %5d\n", s, aggressors[s]);
  }
###############################################################################
  if (broken) {
    printf("\n");
    printf ("Feeds that broken the pipe on nntpd\n");
    for(s in pipebreakers)
      printf("%-35s %5d\n", s, pipebreakers[s]);
  }
###############################################################################
  if (slacked) {
    printf("\n");
    printf ("Feeds that got timed out by nntpd\n");
    for(s in slackers)
      printf("%-35s %5d\n", s, slackers[s]);
  }
###############################################################################
  if (flooded) {
    printf("\n");
    printf ("Feeds that had to be interrupted to give others a chance\n");
    for(s in flooders)
      printf("%-35s %5d\n", s, flooders[s]);
  }
###############################################################################
  if (linkdowns) {
    printf("\n");
    printf ("Links that have been down\n");
    for(s in linkdown)
      printf("%-35s %5d\n", s, linkdown[s]);
  }
###############################################################################
  if (msgid) {
    printf("\n");
    printf ("Message id daemon statistics\n");
    printf ("%6d duplicate messages\n", msgid_dups);
    printf ("%6d new messages\n", msgid_new);
    printf ("%6d canceled messages\n", msgid_cancel);
    printf ("%6d freed messages\n", msgid_freed);
  }
###############################################################################
  if (dups) {
    printf("\n");
    printf("Various error messages that the script cannot process\n")
    for (s in duperrs)
      printf("%3d %s\n", duperrs[s], s);
  }
###############################################################################
}
' "$@"
