#!/usr/local/bin/perl --	# -*-Perl-*-
##############################################################################
#
# m5 is a poor-man's m4
# 
# written in March of 1991 by Bill Mann; tested with v3.044 of perl
#    bill@ksr.com (ksr!bill@uunet.uu.net)
#
# This program may be used or modified for any purpose, but I would
# appreciate it if you would reports bugs and fixes to me, and credit
# yourself with any modifications you make.  -bill
#
# I have mailed Larry Wall a fix for the 'perl bug' mentioned in the
# comments below.  Once this bug is fixed, removing the 'redundant'
# double-quotes will provide some improvement in speed; the speed-up
# will be application dependent.  -bill
#
##############################################################################

#-- where I got it -----------------------------------------------------------
#From: Adrian F Clark <alien@sx.ac.uk>
#To: partain
#Subject: Re: Macro-processor in Perl?
#Date: Thu, 25 Jul 91 09:43:05 BST
#
#"Ask and ye shall receive" ... this sounds like just what we both
#want!
#
#..a
#
#Date: Wed, 24 Jul 91 18:09:30 EDT
#From: bill%ksr.com@uk.ac.essex.mailhost
#To: "Adrian F Clark" <alien%sx.ac.uk@uk.ac.essex.mailhost>
#In-Reply-To: alien@essex.ac.uk's message of 24 Jul 91 09:32:13 EDT
#Subject: Re: Macro-processor in Perl?
#Sender: bill%ksr.com%ksr%uunet.uu.net@uk.ac.essex.mailhost
#
#I wrote a m4 clone, with just a couple of features missing, and a
#couple added, because the versions of m4 we had in-house, mostly from
#SUN, were either very old or buggy.
#
#The major missing feature is multiple separate output files.  The
#major addition is that eval() now does a full-fledged perl eval, which
#'just happens' to include everything the m4 eval includes, in the same
#syntax.
#
#This program is in daily use, and has been tested by the author, but
#it is only used for a few applications, so problems may still be
#lurking.  I'll be happy to answer a few questions, but can't promise
#to spend too much time, and I provide no guarantees.  I call it m5
#just keep it distinct from m4.
#-----------------------------------------------------------------------------

# Auto identification
#$program_path = $0;
#$program_name = $program_path;
#$program_name =~ s:^.*/::;
#$cmd_line = join(' ', $program_name, @ARGV);
#$rcs_id = '$Id: m5,v 1.1 1991/03/12 19:48:13 bill Exp $ ';
select (STDERR); $| = 1; select (STDOUT);
#print STDERR "$program_path ", '$Revision: 1.1 $ ', "\n";
#print STDERR $rcs_id, "\n";


#eval "print STDERR \$die='Unknown parameter $1\n' if !defined \$$1; \$$1=\$';"
#    while ($ARGV[0] =~ /^(\w+)=/ && shift(@ARGV));
#exit 255 if $die;		# process any FOO=bar switches

%D = ("changecom", "\000changecom",
      "changequote", "\000changequote",
      "decr", "\000decr",
      "define", "\000define",
      "defn", "\000defn",
      "dnl", "\000dnl",
      "dumpdef", "\000dumpdef",
      "errprint", "\000errprint",
      "eval", "\000eval",
      "ifdef", "\000ifdef",
      "ifelse", "\000ifelse",
      "include", "\000include",
      "incr", "\000incr",
      "index", "\000index",
      "len", "\000len",
      "maketemp", "\000maketemp",
      "m4exit", "\000m4exit",
      "m4wrap", "\000m4wrap",
      "popdef", "\000popdef",
      "pushdef", "\000pushdef",
      "repeat", "\000repeat",
      "shift", "\000shift",
      "substr", "\000substr",
      "syscmd", "\000syscmd",
      "undefine", "\000undefine",
      "unix", "",
      );

$ccp = 0; $file = '';
@inc = '';
while ($ARGV[0] =~ /^-./) {
    $_ = shift(@ARGV);
    /^-D(\w+)(=(.*))?$/ && ($D{$1} = (defined($3)? $3 : ''), next);
    /^-U(\w+)$/ && (delete $D{$1}, next);
    /^-e$/ && (next);		# not implemented yet -bill
    /^-s$/ && ($ccp = 1, next);
    /^-I(.*)$/ && (push(@inc, ($1 || shift(@ARGV)) . '/'), next);
    die "unrecognized switch: $_\n";
}

# other globals: @args, $ab, $mi, $lq, $rq, $skip
$ab = $mi = 0;			# undef would be ok
$m4wrap = '';

$lcom = "\#";
$rcom = "\n";
&changequote;

@ARGV || unshift(@ARGV, '-');

for (shift(@ARGV)) {		# 'feature': each file must end cleanly
    print &expand('', &openf($_));
}
$m4wrap ne '' && print &expand($m4wrap, '');
exit;

sub openf {
    $ARGV = $_[0];
    $FH = "F" . @FH;
    open($FH, $ARGV) || die "Can't open '$ARGV': $@";
    $line = 0;
    $ccp && printf ("#line %s \"%s\"\n", 1, $ARGV);
    $FH;
}

# $st states:
# 0:  normal text, or reading macro arguments if $pf != 0
# 1:  quoted string
# 2:  comment
# 3:  starting to read an argument (skipping white space)

# expand expands its input string, removing one layer of matching quotes;
# it expands defined symbols as long as they are not between quotes.
# whenever it reaches the end of the string it reads a new line and extends
# its input if possible; if it can't it returns errors if in a macro 
# argument or in a quoted string.
sub expand {
    local($in,$FH) = @_;
    local($i,$st,$qn,$pn) = (0,0,0,0);	# used recursively
    local($ci,$pi,$t);		# not used recursively, but declared anyway

    while (1) {			# for 'each' character
	if ($i == length($in)) {
	    if ($FH) {
		unless ($pn) {	# defer output while reading macro arguments
		    print $in;
		    $in = '';
		    $i = $ci = 0;
		}

		if ($t = <$FH>) {
		    $in .= $t;
		    $line++;
		    next;	# process next line
		}

		close($FH);

		if (@argv) {
		    $FH = pop(@FH);
		    $ARGV = pop(@argv);
		    $line = pop(@line);
		    $in .= pop(@in);
		    $ccp && printf ("#line %s \"%s\"\n", $line, $ARGV);
		    next;
		}
	    }
	    $st == 1 && die "missing close quote\n";
	    $st == 2 && die "missing close comment\n";
	    $pn && die "missing close parenthesis\n";
	    last;
	}
	
##	substr($in, $i) =~ $skip && ($i += length($&)) == length($in) && next;
	substr($in, $i) =~ /^[ \t0-9]+/ &&
	    ($i += length($&)) == length($in) && next;

	if ($st == 1) {		# if in a quoted string
	    if (substr($in, $i, length($rq)) eq $rq) {
		if (--$qn) {
		    $i += length($rq);
		} else {
		    substr($in, $i, length($rq)) = '';
		    $st = 0;
		}
		next;
	    }
	    if (substr($in, $i, length($lq)) eq $lq) {
		++$qn;
		$i += length($lq);
		next;
	    }
	    ++$i;
	    next;
	}
	elsif ($st == 2) {	# if in a comment
	    if (($i = index($in, $rcom, $i)) == -1) { # extends past eol
		$i = length($in);
		next;
	    }

	    $i += length($rcom);
	    if ($pn) {		# if in parens
		substr($in, $ci, $i-$ci) = '';
		$i = $ci;
		$rcom eq "\n" && (substr($in, $i++, 0) = "\n");
	    }
	    $st = 0;
	    next;
	}
	elsif ($st == 3) {		# if starting an argument
	    substr($in, $pi, $i-$pi+1) =~ s/^(\s*)(\S)/$2/ &&
		($i -= length($1), $st = 0);
	}

	if (substr($in, $i, length($lq)) eq $lq && $lq) {
	    substr($in, $i, length($lq)) = '';
	    $st = $qn = 1;
	    next;
	}

	if (substr($in, $i, length($lcom)) eq $lcom && $lcom) {
	    $i += length($lcom);
	    $ci = $i;
	    $st = 2;
	    next;
	}

	if ($pn) {
	    $t = substr($in, $i, 1);
	    if ($t eq '(') {
		++$pn;
		++$i;
		next;
	    }
	    elsif ($t eq ',') {
		if ($pn == 1) {
		    push(@args, substr($in, $pi, $i-$pi));
		    $pi = $i+1;
		    $st = 3;
		}
		++$i;
		next;
	    }
	    elsif ($t eq ')') {
		if (--$pn == 0) {
		    push(@args, substr($in, $pi, $i-$pi));
		    ++$i;
		    &callm;
		    $pi = pop(@pi);
		    $pn = pop(@pn);
		    next;
		}
		++$i;
		next;
	    }
	}

	substr($in, $i) =~ /^([a-z_]\w*)(\()?/i || (++$i, next);
	defined($D{$1}) || ($i += length($1), next);

	push(@ab, $ab);
	push(@args, $1);
	$ab = $#args;
	push(@mi, $mi);
	$mi = $i;
	$i += length($&);

	if ($2) {
	    push(@pi, $pi);
	    push(@pn, $pn);
	    $pi = $i;
	    $pn = 1;
	    $st = 3;
	    next;
	}

	&callm;
    }
    $in;
}

sub callm {
    
    if (($t = $D{$args[$ab]}) =~ s/^\000//) {
	$t = &$t;		# may recurse here
    } else {
	$t =~ s/\$(\#|\*|\@|\d)/$1 eq '#' ? $#args-$ab :
	    $1 eq '*' ? join(',', @args[$ab+1..$#args]) :
		$1 eq '@' ? join(',', &quotelist(@args[$ab+1..$#args])) :
		    $args[$ab+$1]/ge;
    }

    $ccp && 
	(substr($in, $mi, $i-$mi) =~ tr/\n/\n/) != ($t =~ tr/\n/\n/) && 
	    ($t =~ s/\n$//,
	     $t .= sprintf("\n#line %s \"%s\"\n", $line, $ARGV));
    substr($in, $mi, $i-$mi) = $t;

    $i = $mi;			# rescan
    $#args = $ab-1;
    $mi = pop(@mi);
    $ab = pop(@ab);
}

sub quotelist {
    local(@T);
    for (@_) {
	push(@T, $lq . $_ . $rq);
    }
    @T;
}

sub setskip {
    ($skip = "$lcom$rcom$lq$rq(,)") =~ s/(\W)/\\$1/g;
    $skip = "^[^a-zA-Z_$skip]+";
}

# m5 functions:

sub changecom {
    $lcom = $args[$ab+1];
    $rcom = $args[$ab+2] || "\n";
    &setskip;
    "";
}

sub changequote {
    $lq = $args[$ab+1];
    $rq = $args[$ab+2] || $lq;
    if ($#args <= $ab) {	# if no arguments at all
	$lq = "\`";
	$rq = "\'";
    }
    &setskip;
    "";
}

sub decr {
    $t = $args[$ab+1] - 1;
    "$t";			# quotes required for perl bug
}

sub define {
    $args[$ab+1] =~ /^[a-z_]\w*$/i || die "bad macro name: '$args[$ab+1]'\n";
    $D{$args[$ab+1]} = $args[$ab+2];
    "";
}

sub defn {
    $t = '';
    for (@args[$ab+1..$#args]) {
	$t .= $lq . $D{$_} . $rq;
    }
    $t;
}

sub dnl {
    substr($in, $i) =~ s/[^\n]*\n?//;
    "";
}

sub dumpdef {
    for (sort(keys %D)) {
	if (($t =  $D{$_}) =~ s/^\000//) {
	    printf STDERR ("%s:\t<%s>\n", $_, $t);
	} else {
	    printf STDERR ("%s:\t%s%s%s\n", $_, $lq, $t, $rq);
	}
    }
    "";
}

sub errprint {
    print STDERR $args[$ab+1], "\n";
    "";
}

sub eval {
    local($r) = int($args[$ab+2]);
    package m5;
    $main't = eval $main'args[$main'ab+1];  #'];
    package main;
    $@ && die "eval($args[$ab+1]) failed: $@\n";
    if ($r > 1 && $t =~ s/^(-)?(\d+)$//) {	# if a radix is specified
	local($d) = $2;
	do {
	    $t = ('0'..'9','A'..'Z')[$d % $r] . $t;
	    $d = int($d / $r);
	} while $d;

	$d = $args[$ab+3] - length($t);
	$d > 0 && ($t = ('0' x $d) . $t);
	$t = $1 . $t;
    }
    "$t";			# quotes required for perl bug
}

sub ifdef {
    defined($D{$args[$ab+1]}) ? $args[$ab+2] : $args[$ab+3];
}

sub ifelse {
    for ($t = $ab+1; $t < $#args; $t += 3) {
	$args[$t] eq $args[$t+1] && return "$args[$t+2]";
    }
    return "$args[$t]";		# quotes required for perl bug
}

sub include {
    for (@inc) {		# search -I list
	$t = "$_$args[$ab+1]";
	if (-f $t) {
	    push(@FH, $FH);
	    push(@argv, $ARGV);
	    push(@line, $line);
	    push(@in, substr($in, $i));
	    substr($in, $i) = '';
	    &openf($t);
	    return "";
	}
    }
    die "Can't find include file $args[$ab+1]";
}

sub incr {
    $t = $args[$ab+1] + 1;
    "$t";			# quotes required for perl bug
}

sub index {
    $t = index($args[$ab+1], $args[$ab+2]);
    "$t";			# quotes required for perl bug
}

sub len {
    $t = length($args[$ab+1]);
    "$t";			# quotes required for perl bug
}

sub maketemp {
    ($t = $args[$ab+1]) =~ s/X{1,6}$/a$$/; # via experiments
    "$t";			# quotes required for perl bug
}

sub m4exit {
    exit($args[$ab+1]);
}

sub m4wrap {
    $m4wrap = $args[$ab+1];
    "";
}

sub popdef {
    eval "\$D{$args[$ab+1]} = pop(@M5'$args[$ab+1])";
    "";
}

sub pushdef {
    eval "push(@M5'$args[$ab+1], \$D{$args[$ab+1]})";
    $D{$args[$ab+1]} = $args[$ab+2];
    "";
}

sub repeat {	# repeat(#, expr); evals expr # times; $0 = 0..#-1
    local($t);
    for ($args[$ab] = 0; $args[$ab] < $args[$ab+1]; $args[$ab]++) {
	$t .= &expand($args[$ab+2], '');
    }
    $t;
}

sub shift {
    join(',', &quotelist(@args[$ab+2..$#args]));
}

sub substr {
    $t = substr($args[$ab+1], $args[$ab+2], $args[$ab+3] || 1000000000);
    "$t";			# quotes required for perl bug
}

sub syscmd {
    $m5'sysval = system($args[$ab+1]);	#' this is wrong, but close
    "";
}

sub undefine {
    delete $D{$args[$ab+1]};
    "";
}

- ---------------------- end ----------------------

