|
#!/usr/bin/perl -w # # tburster --crb3 16feb04/22jan07/ # # breaks up a text file into labeled subfiles # # #extract /filename # -or- # #extract ...filename is then generated from template/count # # -d directory to put output # -t template for filename # XXXXX in filename is replaced by count-sequence # --checks and skips existing tempfnames # -n number to start count at # # explicits can have >> prefix, in which case they append. # thus, this extractor can separate out two or more streams of text # into place, not just into parcel-files. # # --crb3 22Jan07: add $use_ch, option -u, to use #CH as well as #extract # # -r regeneration -- overwrites existing files (presumably the source has changed) # (implicitly does that for explicits; dodges existing tempfnames) # ...we now do this. # use strict; # my $chatty=1; my $debug=0; my $safety=0; my $regen=1; my $use_ch=0; my $cnt=0; # for 0, first used number will be 1 my $dir="."; my $tpt=""; use vars qw/$key $arg $infname $printit $append $ln $fn $method/; while( (defined($ARGV[0])) and (substr($ARGV[0],0,1) eq '-')){ $arg=shift(@ARGV); ($key,$arg)=unpack("xaa*",$arg); if($key eq 'D'){ # debug $debug ^= 1; next; # toggle it }elsif($key eq 'v'){ # verbose == chatty $chatty++; next; # ..in case levels are used }elsif($key eq 'r'){ # regeneration (overwrite of $regen ^= 1; next; # enumerate fnames) }elsif($key eq 'S'){ # safety $safety ^= 1; next; }elsif($key eq 'u'){ # use #CH as well as #extract $use_ch ^= 1; next; } # now switches that take arg if(defined($arg) and length($arg)){ $arg =~ s/^\s*\=\s*//; }else{ $arg=shift(@ARGV); } if($key eq 'd'){ # output directory $dir=$arg; }elsif($key eq 't'){ # filename template $tpt=$arg; }elsif($key eq 'n'){ # starting autoname number $cnt=0+$arg; }else{ print "unrecognized switch $key=$arg\n"; } } # # all that should be left on the commandline is infile, or a list of such # grab first infname and make a default count-template out of it # unless(length($tpt)){ $tpt = $ARGV[0]."XXXXXX"; } my $breaks=0; my $ln=$tpt; #$ln =~ s/^.*([X]+).*$/$1/e; substr($ln,0,index($ln,"XX"))=""; substr($ln,rindex($ln,"XX")+2)=""; my $xlen=length($ln); my $sprintemplate="%".$xlen.".0".$xlen."d"; # " syntax balance print "tpt=$tpt ln=$ln xlen=$xlen sprintemplate=$sprintemplate\n" if $debug; # # loop, to chew up a list of filenames # while(defined($infname=shift(@ARGV))){ print "bursting $infname to $dir\n" if $chatty; open(I,"<$infname") or die "can't open infile $infname\n"; $printit=$append=0; while(defined($ln=<I>)){ if( (index($ln,"#extract")==0) or ($use_ch and (index($ln,"#CH")==0)) ){ $breaks++; $append=0; chomp $ln; $ln =~ s/^\#\w+\s*//; # gotta be one of the two if(length($ln)){ $fn=$ln; if(index($fn,'>>')>-1){ $append=1; substr($fn,0,2)=""; } }else{ $fn=&gen_tmpname; } $fn="$dir/$fn"; $method = ($append ? '>>' : '>'); open(O,"$method$fn") or die "can't make/append output $fn\n"; if($append){ seek(O,0,2); } $printit=1; }else{ print O $ln if $printit; } } close(O) if $printit; close(I); } if($breaks){ print "$breaks splits\n" if $chatty; exit(0); }else{ warn "\aNO split-points found!!\n"; exit(1); } # # gen_tmpname. # # generate a new filename to global templating. if ! $regen, # increment until we find a name that doesn't yet exist. 'regen' # assumes we're regenerating a known process run's results, and # we get an increment out of the deal each time anyway. # sub gen_tmpname { my $first=1; my $tmpfname=""; # loop-skipped on first loop due to $first while($first or ($regen ? 0 : (-e $tmpfname))){ $cnt++; ($tmpfname=$tpt) =~ s/X+/sprintf($sprintemplate,$cnt)/e; $first=0; } return($tmpfname); } |
Grab a gzipped copy here |
| Syntax highlighting using Syntax::Highlight::Engine::Kate |