|
#!/usr/bin/perl -w # # tpp text preprocessor --crb3 16feb04 # # intentionally dirt-simple preprocessor for text. # first iteration: #include, #define. '##' comments. # #include filename(firstline,lastline) is supported. # defines use a "#define (symbol=value)" syntax, so both symbol # and value can include spaces. this is for find-and-replace. # --crb3 19feb04: #ifdef, #ifndef, #else, #endif. # --crb3 20feb04: "#define symbol value" now supported, for single-word # values. if you have spaces in symbol or value, though, you need # the explicit parens-and-equate. # #TODAY# date-tag added. #CHAPNUM# chapter/incrementer tag added. # Neither of these tags will be recognized as such unless they're indented. # --crb3 25feb04: #include now controlled by #ifdef # --crb3 29feb04: (duhhh) #define now controlled by #ifdef too... # --crb3 01mar04: quick #chapnum# hack. # --crb3 03Jul06: add chapnum to filetag # # This tool eats but ignores #commands that it doesn't support. # # use strict; # my $bakpend="~"; # append to filename for backup my $safety=0; my $debug=0; my $chatty=0; my $unsups=0; # pass unrecognized #commands and #comments? my($samename,$tagged,$firstl,$lastl); $samename=$tagged=0; $firstl=$lastl=-1; my $chapnum=1; # # convenience symbol-builtin(s), with #TAG# format # my $today=&get_date; my(@ifstack); $ifstack[0]=1; # ifdef stack, uses un/shifts so [0] is TOS unless(defined($ARGV[0])){ print <<EOT; usage: tpp [options] tppfile -o output filename -t toggle: tag passages with their input filenames -u toggle: pass rather than discard unrecognized directives EOT exit 0; } # commandline processing my($key,$arg,$outfname); while(substr($ARGV[0],0,1) eq '-'){ $arg=shift(@ARGV); print "ARG>> $arg\n" if $debug; ($key,$arg)=unpack("xaa*",$arg); if($key eq 'D'){ # debug $debug ^= 1; next; # toggle it }elsif($key eq 'v'){ # verbose == chatty $chatty++; next; # ..in case levels are used }elsif($key eq 'S'){ # safety $safety ^= 1; next; }elsif($key eq 't'){ # tag passages with their $tagged ^= 1; next; # input filenames }elsif($key eq 'u'){ # pass unsupported #cmds, #cmts? $unsups ^= 1; next; } # now switches that take arg if(defined($arg) and length($arg)){ $arg =~ s/^\s*\=\s*//; }else{ $arg=shift(@ARGV); } if($key eq 'o'){ # output filename $outfname=$arg; }elsif($key eq 'l'){ $arg =~ s/^.*\(//; # parens? eat 'em. $arg =~ s/\).*$//; ($firstl,$lastl)=split(/\,/,$arg,2); $firstl=0+$firstl; # coerce to numeric $lastl=0+$lastl; }else{ print "unrecognized switch $key=$arg\n"; } }continue{ print "key=$key arg=$arg\n" if $debug; } my $aftername; unless(defined($outfname) and length($outfname)){ if($ARGV[0] =~ /\.tpp$/){ # simplest case: test.tpp -> test.txt ($outfname = $ARGV[0] ) =~ s/\.tpp/\.txt/; } } foreach $arg (@ARGV){ $samename++ if $outfname eq $arg; } if($samename){ $aftername=$outfname; $outfname=`mktemp tpp.$$.XXXXXX`; } print "tpp'ing $outfname from @ARGV\n" if ($debug or $chatty); open(O,">$outfname") or die "can't make outfile $outfname\n"; while(defined($ARGV[0])){ tpp(shift(@ARGV)); } close(O); if($samename){ print "renaming $aftername to $aftername$bakpend\n" if ($debug or $chatty); rename("$aftername","$aftername$bakpend"); print "renaming $outfname to $aftername\n" if ($debug or $chatty); rename($outfname,$aftername); } # # tpp. # takes an input filename, possibly a filename(fn,ln) string, # where fn is first line and ln is last line (starting at 0) # to include. # output file is already open at call. # this function reuses filehandle I for all input files. # sub tpp { my ($fname)=(@_); my $fln=-1; my $lln=-1; my($ln,$told,$newfname,$cmd,$sym,$nustate,$assertion,$fore,$aft); my($fulline,$def,$find,@finds,$key); my $linum=-1; my $printit=1; use vars '%fnr'; # I don't yet know why I couldn't use my here. my $chv=$chapnum-1; print O "\n===[$fname][$chv]\n\n" if $tagged; # editing-process convenience # now actively used by tcams if(index($fname,'(')>-1){ my $lines; ($fname,$lines)=split(/\(/,$fname,2); $lines =~ s/\).*$//; ($fln,$lln)=split(/\,/,$lines,2); $fln=0+$fln; $lln=0+$lln; # coerce to numeric for compares } open(I,"<$fname") or die "can't open fname $fname\n"; while( defined($ln=<I>) and index($ln,"__END__") != 0){ $linum++; $fulline=$ln; next if($linum==0 and index($ln,"#!")==0); # allow and ignore shebang lines if($fln != -1){ # firstline-skipping: do dry reads next if $linum < $fln; # until the specified line is reached. } if($lln != -1){ # lastline skipping: drop the file last if $linum > $lln; # right there. } if(substr($ln,0,1) eq '#'){ next if substr($ln,1,1) eq '#'; # '##' is commenting-off. remember chomp $ln; # WordStar/MailMerge? think dots. $ln =~ s/^\#\s*//; # shave it. note the indent-space-eating. if(index($ln,"include")==0){ $ln =~ s/\/{2}.*$//; # whack off any // comment $ln =~ s/\s+$//; # and trailing space. leave (f,l). ($newfname=$ln) =~ s/^include\s+//; if($printit){ $told=tell(I); close(I); tpp($newfname); # <<** RECURSE FOR INCLUDE-FILE **<< open(I,"<$fname") or die "can't reopen fname $fname\n"; seek(I,$told,0); # get back to biz in old one } }elsif(index($ln,"define")==0 or index($ln,"undef")==0){ next unless $printit; # you can hide defines with ifdefs ($def,$ln) = split(/\s+/,$ln,2); # a regex eats the spaces too. print "$def $ln!\n" if $debug; if(index($ln,'(')>-1){ # parens required for macro-paste. $ln =~ s/^.*\(//; # that way spaces can be part of it. $ln =~ s/\)\s*$//; ($fore,$aft)=split('=',$ln,2); }else{ # symbol-assertion. sym=sym. will be $ln =~ s/\s*$//; # in @find, but self-paste-benign. if(index($ln,' ')>-1){ ($fore,$aft)=split(' ',$ln,2); }else{ $fore=$aft=$ln; # for speed, don't use #ifdef symbols } # that also show up in your text. } if($def eq "define"){ $fnr{$fore}=$aft; }else{ delete $fnr{$fore}; } (@finds)=(sort (keys %fnr)); # sort-once read-many ASCIIbetically if($debug){ print "FNR ARRAY:\n"; foreach $key (@finds){ print " $key -> ",$fnr{$key},"\n"; } } # # ifdef/ifndef handling is nestable. the logic only allows a print # if both the new current and newly-pushed TOS states are 1. this is what # you'd expect if ifdef-occluded #ifdef statements were ignored, but # requires less state-maintenance. only simple defines are used. when macro # logic gets powerful, it gets arcane: that's why I needed to write this # little dirt-simple preprocessor in the first place. # }elsif(index($ln,"ifdef")==0 or index($ln,"ifndef")==0){ unshift(@ifstack,$printit); ($cmd,$sym)=split(/\s+/,$ln,2); $sym =~ s/\s+$//; $nustate=exists($fnr{$sym}); $nustate ^= 1 if $cmd eq "ifndef"; $printit &= $nustate; # # 'if' evaluation uses an eval. it's a security risk if it runs # at a privileged level or in a Net-exposed condition. # }elsif(index($ln,"if")==0){ ($assertion=$ln) =~ s/^if\s+//i; if(index($assertion,'(')>-1){ # parens required for multiword. $assertion =~ s/^.*\(//; # that way spaces can be part of it. $assertion =~ s/\)\s*$//; ($sym,$assertion)=split(/\[^A-Za-z0-9_]+/,$assertion,2); }else{ # symbol-assertion. sym=sym. will be $ln =~ s/\s*$//; # in @find, but self-paste-benign. ($sym,$assertion)=split(/\s+/,$assertion,2); $assertion = "eq $assertion" # shortcut default eval: string cmp if $assertion =~ /^[A-Za-z0-9]/; } unshift(@ifstack,$printit); $nustate = ( eval "$fnr{$sym} $assertion" ? 1 : 0 ); $printit &= $nustate; }elsif(index($ln,"else")==0){ # only goes to 1 if $printit is 0. $printit ^= $ifstack[0]; # and $ifstack[0] is 1. }elsif(index($ln,"endif")==0){ # pop to previous state. $printit = shift(@ifstack); }else{ if($unsups){ print O $fulline or die "can't print to $fname; disk full?\n"; next; } } }else{ # not starting with '#'. we don't revisit looking for pasteds. # # in the find-and-replace loop below, pretesting with index() before # committing to invoking/compiling a regex saves us BUNCHES of time: 8 secs # vs 61 secs in one test (where the no-defines time was 7 secs). yes you # can chain defines if you can sequence them ASCIIbetically. # fnr's and specials like CHAPNUM are only done on printing text. # hack: lowercase #chapnum# uses prior number, presuming #CHAPNUM# # is used first, once, to advance the number. this allows the number # to be repeated within the text. # if($printit){ foreach $find (@finds){ # macro expansions first, so they # control the tweaky stuff. $ln =~ s/$find/$fnr{$find}/sg if index($ln,$find) >-1; } if(index($ln,'#CHAPNUM#') >-1){ $ln =~ s/\#CHAPNUM\#/$chapnum++/se; } if(index($ln,'#chapnum#') >-1){ $ln =~ s/\#chapnum\#/$chapnum-1/gse; } if(index($ln,'#TODAY#') >-1){ $ln =~ s/\#TODAY\#/$today/sge; } if(index($ln,"\\n") >-1){ # verbatim, after macro expansion $ln =~ s/\\n/\n/sg; # ..to get line-expanding macros } print O $ln or die "can't print to $fname; disk full?\n"; } } } close(I); } # # get_date. # # return just a MILdate without time. # sub get_date { my $today=&get_datime; substr($today,0,9)=""; return($today); } # # get_datime. # # return a MIL-format-date and time string. # sub get_datime{ my($rightnow,$sec,$min,$hour,$mday,$mon,$yr,$amon); ($sec,$min,$hour,$mday,$mon,$yr,,,,) = localtime(); $yr %= 100; $amon = (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec))[$mon]; $rightnow = ($hour<10?"0":"")."$hour:".($min<10?"0":"")."$min:".($sec<10?"0":"")."$sec "; $rightnow .= ($mday<10?"0":"")."$mday$amon".($yr<10?"0":"")."$yr"; return $rightnow; } |
Grab a gzipped copy here |
| Syntax highlighting using Syntax::Highlight::Engine::Kate |