|
#!/usr/bin/perl -w # # txt2html v0.01 --crb3 09feb01/20feb01 # started out with fanfiction.net stream. then expanded switch-options # to allow for full pages, set up the title nicely, etc.; still have # yet to get it to tear off mail-headers. # this tool doesn't do all the pretty tabling done by the various # tools on the Win32 side (ws2html etc), it just converts from # a r.a.a.c-style text posting to something that can be fed to # ff.net, or (full-page) posted on an internal server. # # --crb3 04mar03: if given filename doen't exist, try tacking ".txt" # onto it and look for that. # --crb3 05mar04: qualify initial <p> with !$tpt so we don't get <p><html> # --crb3 18Jul06: surgery on option processing. add sig,summary # --crb3 14Feb07: -T hardcore toggles: for when no *,_ is within text # # $debug = 0; # get chatty, show internal results # # these are command-line switches # $para=1; # use <P></P> paragraphing? $hardln=0; # convert \n to <BR>? $fullpage=0; # generate full page? $tpt=0; # got a template for fullpage? $template=""; # here's where its name goes $hardtog=0; # hardcore toggles transit on EVERY '_','*' $title="."; $author="."; $sig=$summary=""; $helps=<<EOT; txt2html [options] infile [outfile] options: -D debug -f emit full HTML page, not just a story stream toggle -h keep hard linebreaks toggle -p use <P> paragraphs toggle (default) -a='AUTHOR' insert author's name -s=sig (author + sigdate) -d=sigdate alone -S='Summary...' or \@summary.file -t title -T hardcore italic, bold toggles EOT die $helps if($#ARGV < 0); # commandline processing while(defined($ARGV[0]) and substr($ARGV[0],0,1) eq '-'){ $arg=shift(@ARGV); print "ARG>> $arg\n" if $debug; ($key,$arg)=unpack("xaa*",$arg); if($key eq 'D'){ # debug $debug ^= 1; next; # toggle it }elsif($key eq 'p'){ # use <P> paragraphs $para ^=1; next; }elsif($key eq 'h'){ # keep hard linebreaks $hardln ^= 1; next; }elsif($key eq 'T'){ # pass unsupported #cmds, #cmts? $hardtog ^= 1; next; } # now switches that take arg if(defined($arg) and length($arg)){ $arg =~ s/^\s*\=\s*//; }else{ $arg=shift(@ARGV); } if($key eq 'f'){ # emit full HTML page $fullpage ^= 1; # toggle the switch $template=$arg; # '.' means, use internal tpt chomp $template; # ..or use "-f=" if($template ne "" and $template ne '.'){ $tpt=1; # is a template filename print "using template $template... "; }else{ print "using internal template... "; } }elsif($key eq 'a'){ $author=$arg; }elsif($key eq 't'){ $title=$arg; }elsif($key eq 's'){ # signature-line $sig=$arg; }elsif($key eq 'S'){ # summary $summary = $arg; }elsif($key eq 'd'){ # sigline date(s) $sigdate = $arg; }elsif($key eq 'i'){ # infile $infile=$arg; }elsif($key eq 'o'){ # outfile $outfile=$arg; }else{ print "unrecognized switch $key=$arg\n"; } }continue{ print "key=$key arg=$arg\n" if $debug; } $infile = shift(@ARGV) if defined $ARGV[0]; $outfile = shift(@ARGV) if defined $ARGV[0]; unless(defined($outfile)){ ($outfile=$infile) =~ s/\.txt//; $outfile .= '.html'; } if(defined($ARGV[0])){ warn "extra arg ignored: $ARGV[0]\n"; } $|=1 if $debug; $infile .= ".txt" unless(-e $infile); $outfile .= ".html" unless index($outfile,'.html') >-1; die "oops, infile and outfile are both $infile\n" if $infile eq $outfile; print "Converting $infile to $outfile"; print " full" if $fullpage; print "\n"; if(defined($sigdate)){ $sig="--$author" unless(defined($sig) and length($sig)); $sig .= " $sigdate"; } if(index($summary,'@') eq 0){ ($sumfile=$summary) =~ s/^\@//; open(SUM,"<$sumfile") or die "can't find sumfile $sumfile\n"; (@sums)=<SUM>; close(SUM); foreach $suml (@sums){ chomp $suml; } $summary = join (' ',@sums); if($debug){ print "SUMMARY: $summary\n"; } } if(index($title,'@') eq 0){ ($titfile=$title) =~ s/^\@//; open(TIT,"<$titfile") or die "can't find sumfile $sumfile\n"; (@tits)=<TIT>; close(TIT); foreach $titl (@tits){ chomp $titl; } $title = join (' ',@tits); if($debug){ print "TITLE: $title\n"; } } open(IFIL,"<$infile") or die "Can't open infile $infile\n"; open(OFIL,">$outfile") or die "Can't make outfile $outfile\n"; print OFIL "<p>" if $para and !$tpt; # possibly fetch first line for title use if($title eq '.'){ if(defined($inline=<IFIL>)){ seek(IFIL,0,0); chomp $inline; ($title=$inline) =~ s/^\;//; } } if($tpt){ open(TFIL,"<$template") or die "can't open template $template\n"; } # # #PAYLOAD# tag must be on a line by itself. we print straight thru # until we get there. # if($fullpage){ while(defined($pgline=&tfetch) and $pgline !~ /^\#PAYLOAD\#/i){ print OFIL $pgline; $datacnt++ if $debug; print '<' if $debug; } } print "\ndatacnt=$datacnt\n" if $debug; $bold=$ital=0; # used with hardcore-toggles while(defined($inline=<IFIL>)){ $blank=0; chomp $inline; $datacnt=0 if $debug; if($inline =~ /^\;\</){ substr($inline,0,1) = ""; # ;<tag> -> <tag> }else{ $inline =~ s/\</</g; # escape-out <> before adding $inline =~ s/\>/>/g; # in any tags } if($inline =~ /^\s*$/){ # blank line? if($para){ $inline = "</p><p>"; }else{ $inline = "<br />"; } $blank=1; } unless($hardtog){ $inline =~ s#(^|[\s,.!?"'])\_\*(\w)#$1<i><b>$2#g; # _*bolditalic*_ $inline =~ s#(\w[,.!?"']*)\*\_([\s.,!?"'\-\:]|$)#$1</i></b>$2#g; $inline =~ s#(^|[\s.,!?"'\-])\_(\w)#$1<i>$2#g; # _italic_ $inline =~ s#(\w[,.!?"'\-\)]*)\_([\s.,!?"'\-\:]+|$)#$1</i>$2#g; } # # --crb3 02dec01: special coding for title... # $inline =~ s#(^|[\s.,!?"'])\*\!\*(\w)#$1<h2>$2#g; #*!*Title*!* $inline =~ s#(\w[\],.!?"']*)\*\!\*([\s,.!?"'\-\:]|$)#$1</h2>$2#g; unless($hardtog){ $inline =~ s#(^|[\s.,!?"'\-])\*(\w)#$1<b>$2#g; # *bold* $inline =~ s#(\w[,.!?"'\-]*)\*([\s,.!?"'\-\:]|$)#$1</b>$2#g; } if($hardtog){ # must come after title coding my $pit=0; while( ($pit=index($inline,'_',$pit)) >-1){ substr($inline,$pit,1) = ( $ital ? '</i>' : '<i>' ); $ital ^= 1; } my $pbt=0; while( ($pbt=index($inline,'*',$pbt)) >-1){ substr($inline,$pbt,1) = ( $bold ? '</b>' : '<b>' ); $bold ^= 1; } } $inline =~ s/\-{8,}/\<hr \/\>/g; $inline .= "<br />" if ($hardln and not $blank); print OFIL "$inline\n"; print '.' if $debug; } print OFIL "</p>\n" if $para; print "\n" if $debug; # now the after-payload tail... if($fullpage){ while(defined($pgline=&tfetch)){ print OFIL $pgline; print '>' if $debug; } } print "\n" if $debug; close IFIL; close OFIL; close TFIL if $tpt; exit; # # fetch. # fetch in next line of template. the #PAYLOAD# tag gets passed # through unaltered. other tags are swapped in here. # # what's past here is a plain-jane page framework, just enough # to put some margins on the page and get through picky browsers. # I use it to post fics for intranet browsing at home. # # sub tfetch { my($i); if($tpt){ $i=<TFIL>; }else{ $i=<DATA>; } return($i) unless defined $i; $i =~ s/\#TITLE\#/$title/g if index($i,'#TITLE#')>-1; $i =~ s/\#AUTHOR\#/$author/g if index($i,'#AUTHOR#')>-1; $i =~ s/\#SIG\#/$sig/g if index($i,'#SIG#')>-1; $i =~ s/\#SUMMARY\#/$summary/g if index($i,'#SUMMARY#')>-1; return($i); } __END__ <html> <head> <meta http-equiv="Content-Type" CONTENT="text/html; charset=iso-8859-1"> <title> #TITLE# </title> <meta name="Author" content=" #AUTHOR# "> <meta name="GENERATOR" content="t2h --crb3 09feb01"> </head> <body text="#000000" bgcolor="#C0C0C0" link="#0000EE" vlink="#CC0000" alink="#FFFF00"> <center> <h2> #TITLE# </h2> <table align="center" width="90%"> <tr><td> #PAYLOAD# </td></tr> </table> <hr> <center> </body> </html> |
Grab a gzipped copy here |
| Syntax highlighting using Syntax::Highlight::Engine::Kate |