#!/usr/bin/perl -w

# Processes HTML templates to produce documentation

# This file is part of CLC-INTERCAL

# Copyright (c) 2023 Claudio Calvelli, all rights reserved.

# CLC-INTERCAL is copyrighted software. However, permission to use, modify,
# and distribute it is granted provided that the conditions set out in the
# licence agreement are met. See files README and COPYING in the distribution.

use strict;
use vars qw($VERSION $PERVERSION);
($VERSION) = ($PERVERSION = "CLC-INTERCAL/Docs aux/mkhtml 1.-94.-2.4") =~ /\s(\S+)$/;

use Language::INTERCAL::Extensions '1.-94.-2.4', qw(load_extension);
use Language::INTERCAL::Splats '1.-94.-2.4', qw(all_splats);
use Language::INTERCAL::Registers '1.-94.-2.4', qw(all_registers);
use Language::INTERCAL::ByteCode '1.-94.-2.4', qw(all_opcodes);

my %splats = (
    NUMBER => [1, 0],
    DESCR  => [0, 2],
    DOC    => [0, 4],
);
my $splats = join('|', map { quotemeta } keys %splats);
$splats = qr/^\s*\@\@SPLATS\s+($splats)\@\@\s*$/;

my %registers = (
    DOUBLE_OH_SEVEN => '%',
    SHARK_FIN       => '^',
    WHIRLPOOL       => '@',
);
my $registers = join('|', map { quotemeta } keys %registers);
$registers = qr/^\s*\@\@REGISTERS\s+($registers)\@\@\s*$/;

my %opcodes = (
    CONSTANTS       => '#',
    REGISTERS       => 'R',
    PREFIXES        => 'P',
    EXPRESSIONS     => 'E',
    STATEMENTS      => 'S',
);
my $opcodes = join('|', map { quotemeta } keys %opcodes);
$opcodes = qr/^\s*\@\@OPCODES\s+($opcodes)\@\@\s*$/;

my %chapters = (
    Charset     => ['charset', 'the chapter on character sets'],
    ArrayIO     => ['input_output', 'the chapter on Input/Output'],
    ReadNumbers => ['input_output', 'the chapter on Input/Output'],
);
my $chapters = join('|', map { quotemeta } keys %chapters);
$chapters = qr/\bL&lt;Language::INTERCAL::($chapters)&gt;/;

@ARGV == 3 or die "Usage: $0 MODULES INPUT OUTPUT\n";
my ($modules, $input, $output) = @ARGV;

load_extension($_) for split(/,/, $modules);

open(IN, '<', $input) or die "$input: $!\n";
open(OUT, '>', $output) or die "$output: $!\n";

LINE:
while (<IN>) {
    if ($_ =~ $splats) {
	my $sort = $1;
	my ($isnum, $field) = @{$splats{$sort}};
	my $ln = $.;
	my $text = '';
	while (<IN>) {
	    if (/^\s*\@\@SPLATS\@\@\s*$/) {
		my @as = all_splats();
		if ($isnum) {
		    @as = sort { $a->[$field] <=> $b->[$field] } @as;
		} else {
		    @as = sort { $a->[$field] cmp $b->[$field] || $a->[0] <=> $b->[0] } @as;
		}
		for my $sp (@as) {
		    my $descr = $sp->[2];
		    my @parms = @{$sp->[3]};
		    $descr =~ s(%){'<I>' . (shift @parms) . '</I>'}ge;
		    my $doc = html($sp->[4]);
		    defined $sp->[5] and
			$doc .= "<BR>\n\tAdded by $sp->[5] extension.";
		    my %t = (
			NUMBER => sprintf('%03d', $sp->[0]),
			DESCR  => $descr,
			DOC    => $doc,
		    );
		    my $t = $text;
		    $t =~ s(\@\@(NUMBER|DESCR|DOC)\@\@){$t{$1}}ge;
		    $t =~ /(\@\@\S+)/
			and die "$input: Invalid data ($1) in \@\@SPLATS\@\@ starting at line $ln\n";
		    print OUT $t or die "$output: $!\n";
		}
		next LINE;
	    } else {
		$text .= $_;
	    }
	}
	die "$input: Unterminated \@\@SPLATS $sort\@\@ started at line $ln\n";
    } elsif ($_ =~ $registers) {
	my $name = $1;
	my $type = $registers{$name};
	my $ln = $.;
	my $text = '';
	while (<IN>) {
	    if (/^\s*\@\@REGISTERS\@\@\s*$/) {
		my @ar = sort { $a->[0] cmp $b->[0] } grep { $_->[4] eq $type } all_registers();
		for my $re (@ar) {
		    my %t = (
			TYPE   => $type,
			NAME   => $re->[0],
			DESCR  => $re->[6],
		    );
		    my $t = $text;
		    $t =~ s(\@\@(TYPE|NAME|DESCR)\@\@){$t{$1}}ge;
		    my $doc = $re->[7];
		    defined $re->[8] and $doc .= "\n\nAdded by $re->[8] extension.";
		    $t =~ s(^(\s*)\@\@DOC\@\@(\s*)$){html($doc, 76, $1, $2)}gem;
		    $t =~ /(\@\@\S+)/
			and die "$input: Invalid data ($1) in \@\@REGISTERS\@\@ starting at line $ln\n";
		    print OUT $t or die "$output: $!\n";
		}
		next LINE;
	    } else {
		$text .= $_;
	    }
	}
	die "$input: Unterminated \@\@REGISTERS $name\@\@ started at line $ln\n";
    } elsif ($_ =~ $opcodes) {
	my $name = $1;
	my $type = $opcodes{$name};
	my $ln = $.;
	my $text = '';
	while (<IN>) {
	    if (/^\s*\@\@OPCODES\@\@\s*$/) {
		my @ao = sort { $a->[2] cmp $b->[2] } grep { $_->[1] eq $type } all_opcodes();
		for my $op (@ao) {
		    my %t = (
			NAME   => $op->[2],
			DESCR  => $op->[0],
		    );
		    my $t = $text;
		    $t =~ s(\@\@(NAME|DESCR)\@\@){$t{$1}}ge;
		    my $doc = $op->[6];
		    defined $op->[7] and $doc .= "\n\nAdded by $op->[7] extension.";
		    $t =~ s(^(\s*)\@\@DOC\@\@(\s*)$){html($doc, 76, $1, $2)}gem;
		    $t =~ /(\@\@\S+)/
			and die "$input: Invalid data ($1) in \@\@REGISTERS\@\@ starting at line $ln\n";
		    print OUT $t or die "$output: $!\n";
		}
		next LINE;
	    } else {
		$text .= $_;
	    }
	}
	die "$input: Unterminated \@\@REGISTERS $name\@\@ started at line $ln\n";
    } elsif (/\@\@/) {
	die $_;
    }
    print OUT $_ or die "$output: $!\n";
}
close OUT or die "$output: $!\n";
close IN;
exit 0;

sub html {
    my ($t, $n, $b, $a) = @_;
    $t =~ s/&/&amp;/g;
    $t =~ s/</&lt;/g;
    $t =~ s/>/&gt;/g;
    $t =~ s/"/&quot;/g;
    $t =~ s/\b([IB])&lt;(.*?)&gt;/<$1>$2<\/$1>/g;
    $t =~ s/\bC&lt;(.*?)&gt;/<CODE>$1<\/CODE>/g;
    $t =~ s($chapters)(<A HREF="$chapters{$1}[0].html">$chapters{$1}[1]</A>)g;
    $t =~ s(<I>([^\W\d]{3})</I>)(<A HREF="#op$1"><I>$1</I></A>)g;
    $t =~ s(<I>%([^\W\d]{2})</I>)(<A HREF="#dos$1"><I>%$1</I></A>)g;
    $t =~ s(<I>\^([^\W\d]{2})</I>)(<A HREF="#shf$1"><I>^$1</I></A>)g;
    $t =~ s(<I>\@([^\W\d]{2,})</I>)(<A HREF="#whp$1"><I>\@$1</I></A>)g;
    $n or return $t;
    my $f = '';
    for my $x (split(/\n+/, $t)) {
	$f =~ s/\Q$a\E$/<BR>$a/;
	$x =~ s/\s+/ /g;
	$x =~ s/^ //;
	$x =~ s/ $//;
	while (length($x) > $n) {
	    my $l = substr($x, 0, $n + 1);
	    $l =~ s/ \S*$//;
	    substr($x, 0, length $l) = '';
	    $x =~ s/^ //;
	    $f .= $b . $l . $a;
	}
	$f .= $b . $x . $a;
    }
    $f;
}

