package Language::INTERCAL::Optimiser;

# Optimiser for INTERCAL bytecode; see also "optimise.iacc"

# This file is part of CLC-INTERCAL

# Copyright (c) 2006-2008, 2023 Claudio Calvelli, all rights reserved.

# CLC-INTERCAL is copyrighted software. However, permission to use, modify,
# and distribute it is granted provided that the conditions set out in the
# licence agreement are met. See files README and COPYING in the distribution.

# The plan for this is to have optimisers defined by INTERCAL programs,
# by introducing a new OPTIMISE statement or else a suitable modification
# of CREATE.  However the current version will just have a few predefined
# rules so that we can implement the mechanism behind the optimiser.

use strict;
use vars qw($VERSION $PERVERSION);
($VERSION) = ($PERVERSION = "CLC-INTERCAL/Base INTERCAL/Optimiser.pm 1.-94.-2.4") =~ /\s(\S+)$/;

use Carp;
use Language::INTERCAL::Exporter '1.-94.-2.1';
use Language::INTERCAL::Splats '1.-94.-2.2', qw(faint SP_INTERNAL SP_TODO);
use Language::INTERCAL::ByteCode '1.-94.-2.4', qw(
    BC BCget bc_skip bytedecode NUM_OPCODES
    BC_HSN BC_INT BC_OSN BC_RIN BC_RSE BC_SEL
);

use constant match_bytecode   => 0;
# match_statement to match_constant MUST have four consecutive values. Because I say so
use constant match_statement  => 1;
use constant match_expression => 2;
use constant match_assignable => 3;
use constant match_register   => 4;
use constant match_constant   => 5;

use constant rewrite_bytecode => 0;
use constant rewrite_match    => 1;
use constant rewrite_eval     => 2;

# bytecode types acceptable for matches
my @types;
$types[match_statement] = { map { ($_ => undef) } qw(S) };
$types[match_expression] = { map { ($_ => undef) } qw(E A R C), '#' };
$types[match_assignable] = { map { ($_ => undef) } qw(A R C), '#' };
$types[match_register] = { map { ($_ => undef) } qw(R) };
$types[match_constant] = { map { ($_ => undef) } qw(C), '#' };

sub new {
    @_ == 1 or croak "Usage: new Language::INTERCAL::Optimiser";
    my ($class) = @_;
    my @opt;
    $opt[BC_RIN] = [[[[match_expression], [match_expression]],
		     [[rewrite_bytecode, pack('C*', BC_INT)], [rewrite_match, 1], [rewrite_match, 0]]]];
    $opt[BC_RSE] = [[[[match_expression], [match_expression]],
		     [[rewrite_bytecode, pack('C*', BC_SEL)], [rewrite_match, 1], [rewrite_match, 0]]]];
    bless \@opt, $class;
}

sub add {
    @_ == 4 or croak "Usage: OPTIMISER->add(OPCODE, PATTERN, REWRITE)";
    my ($opt, $opcode, $pattern, $rewrite) = @_;
    faint(SP_TODO, "Optimiser::add");
    #XXX parse and verify $pattern and $rewrite
    #push @{$opt->[$opcode]}, [$pattern, $rewrite];
    #$opt;
}

sub optimise {
    @_ == 2 or croak "Usage: OPTIMISER->optimise(CODE)";
    my ($opt, $code) = @_;
    _optimise($opt, length $code, $code);
}

sub read {
    @_ == 2 or croak "Usage: OPTIMISER->read(FILEHANDLE)";
    my ($opt, $fh) = @_;
    $fh->read_binary(pack('vv', 0, 0));
}

sub write {
    @_ == 2 or croak "Usage: Language::INTERCAL::Optimiser->write(FILEHANDLE)";
    my ($class, $fh) = @_;
    $fh->write_binary(4);
    $class->new();
}

sub _optimise {
    my ($opt, $end) = @_;
    my $byte = vec($_[2], 0, 8);
    if ($end > 1 && $opt->[$byte]) {
	# see if a rule applies
    RULE:
	for my $rule (@{$opt->[$byte]}) {
	    my ($pattern, $rewrite) = @$rule;
	    my @match;
	    my $start = 1;
	    for my $item (@$pattern) {
		my $match = $item->[0];
		if ($match == match_bytecode) {
		    my $bc = $item->[1];
		    substr($_[2], $start, length $bc) eq $bc or next RULE;
		    my $length = length $bc;
		    push @match, [$start, $start + $length];
		    $start += $length;
		} elsif ($match >= match_statement && $match <= match_constant) {
		    $start < $end or next RULE;
		    $byte = vec($_[2], $start, 8);
		    my $type = (bytedecode $byte)[2];
		    defined $type && exists $types[$match]{$type} or next RULE;
		    my $orig = $start;
		    bc_skip($_[2], \$start, $end) or next RULE;
		    my $len = $start - $orig;
		    push @match, [$len, substr($_[2], $orig, $len)];
		} else {
		    # how on Earth did we get here?
		    faint(SP_INTERNAL, "Invalid encoded pattern in optimiser");
		}
	    }
	    # this rule matches, now see how we rewrite it
	    my $newcode = '';
	    for my $item (@$rewrite) {
		my $action = $item->[0];
		if ($action == rewrite_bytecode) {
		    $newcode .= $item->[1];
		} elsif ($action == rewrite_match) {
		    my $mn = $item->[1];
		    $mn >= 0 && $mn < @match or faint(SP_INTERNAL, "Invalid match in optimiser");
		    $newcode .= _optimise($opt, @{$match[$mn]});
		} elsif ($action == rewrite_eval) {
		    faint(SP_TODO, "rewrite_eval");
		} else {
		    # how on Earth did we get here?
		    faint(SP_INTERNAL, "Invalid encoded rewrite in optimiser");
		}
	    }
	    if ($start < $end) {
		my $len = $end - $start;
		$newcode .= _optimise($opt, $len, substr($_[2], $start, $len));
	    }
	    return $newcode;
	}
    }
    # no rule matched, check sub-sequences
    my (undef, undef, undef, undef, $expect) = bytedecode($byte);
    $expect or return $_[2];
    my $newcode = substr($_[2], 0, 1);
    my $start = 1;
    while ($expect ne '' && $start < $end) {
	my $e = substr($expect, 0, 1, '');
	# if this is constant(list) handle the list specially
	if ($expect ne '' &&
	    $start < $end &&
	    ($e eq 'C' || $e eq '#') &&
	    substr($expect, 0, 1) eq '(')
	{
	    my $const = $start;
	    my $count = eval { BCget($_[2], \$start, $end); };
	    $@ and return $_[2];
	    my $level = 1;
	    my $pos = 1;
	    while ($level > 0) {
		$pos >= length $expect and return $_[2];
		my $c = substr($expect, $pos++, 1);
		if ($c eq '(') {
		    $level++;
		} elsif ($c eq ')') {
		    $level--;
		}
	    }
	    $newcode .= pack('C*', BC($count));
	    my $subarg = substr($expect, 0, $pos, '');
	    $count and $expect = (substr($subarg, 1, $pos - 2) x $count) . $expect;
	    next;
	}
	# constant opcode needs to be left unchanged
	if ($e eq 'O') {
	    my $byte = vec($_[2], $start, 8);
	    my $len = 1;
	    if ($byte == BC_HSN) {
		$len++;
	    } elsif ($byte == BC_OSN) {
		$len += 2;
	    }
	    $start + $len > $end and return $_[2];
	    $newcode .= substr($_[2], $start, $len);
	    $start += $len;
	    next;
	}
	# any byte, like the contents of STR(), also needs to be left unchanged
	if ($e eq 'N') {
	    $start >= $end and return $_[2];
	    $newcode .= substr($_[2], $start, 1);
	    $start++;
	    next;
	}
	# left grammar rule is an abbreviation for count, position, symbol
	if ($e eq '<') {
	    $expect = '##E' . $expect;
	    next;
	}
	# right grammar rule; it's followed by a constant indicating what's next
	if ($e eq '>') {
	    $start >= $end and return $_[2];
	    my $nc = substr($_[2], $start, 1);
	    my $byte = ord($nc);
	    $newcode .= $nc;
	    if ($byte == NUM_OPCODES || $byte == NUM_OPCODES + 1 || $byte == NUM_OPCODES + 3 || $byte == NUM_OPCODES + 6) {
		# position, symbol / number
		$expect = 'EE' . $expect;
		next;
	    }
	    if ($byte == NUM_OPCODES + 15) {
		# "splat", no other data
		next;
	    }
	    if ($byte == NUM_OPCODES + 4) {
		# length; block of bytecode
		my $prev = $start;
		$byte = vec($_[2], $start++, 8);
		if ($byte >= NUM_OPCODES) {
		    $byte -= NUM_OPCODES;
		} elsif ($byte == BC_HSN) {
		    $start >= $end and return $_[2];
		    $byte = vec($_[2], $start++, 8);
		} elsif ($byte == BC_OSN) {
		    $start + 1 >= $end and return $_[2];
		    # can't use vec(..., 16) because it may not be 16-bit aligned
		    $byte = vec($_[2], $start++, 8) << 8;
		    $byte |= vec($_[2], $start++, 8);
		} else {
		    return $_[2];
		}
		# we copy the bytecode as is, we do not optimise it because we do
		# not know in which context it may appear; however if this rule
		# ends up generating code, that will go through the optimiser
		$start += $byte;
		$start > $end and return $_[2];
		$newcode .= substr($_[2], $prev, $start - $prev);
		next;
	    }
	    # unknown type
	    return $_[2];
	}
	# anything else, we can call _optimise() recursively
	my $part = $start;
	bc_skip($_[2], \$start, $end) or return $_[2];
	my $len = $start - $part;
	$len or return $_[2];
	$newcode .= _optimise($opt, $len, substr($_[2], $part, $len));
    }
    $expect ne '' || $start > $end and return $_[2];
    # and finally, if there's more code following, optimise that too
    if ($start < $end) {
	my $len = $end - $start;
	$newcode .= _optimise($opt, $len, substr($_[2], $start, $len));
    }
    return $newcode;
}

1;
