#!/usr/bin/perl -w # # fhtchars --crb3 24apr01 # file-flow filter to fix Micro$soft crap # if(defined($ARGV[0]) and defined($ARGV[1])){ $infile = $ARGV[0]; $outfile = $ARGV[1]; }else{ die "Usage: htchars infile outfile"; } open(IFIL,"<$infile") or die "can't find infile $infile\n"; open(OFIL,">$outfile") or die "can't make outfile $outfile\n"; # while(defined($inline=)){ chomp($inline); $inline =~ s/\r//; # dos -> unix eol # # multiline span tag? pack it together. # while($inline =~ /\{1}?/){ $inline .= $newline = ; chomp $inline; $inline =~ s/\r//; last unless defined($newline); } $inline =~ s/\{1}?//gi; # kill all span tags $inline =~ s/\<\/span\>//gi; # kill all /span tags $inline =~ s/\/

/gi; # chop out classes in

$inline =~ s/\{1}?//gi; # chop out divs $inline =~ s/\<\/div\>//gi; # chop out divs $inline =~ s/\<\/?font\s?[^>]*\>{1}?//gi; # kill all font tags # $inline =~ s($oldendp)($endp); # ->

$inline =~ s/\//gi; $inline =~ s/\<\/o\:p\>/<\/p>/gi; # $inline =~ s/\\<\/o\:p\>/$endp/gi; # ->

$inline =~ tr/\x91\x92\x93\x94/''""/; # fix the damn quotes $inline =~ s/\x85/.../g; $inline =~ s/\x96/--/g; $inline =~ s/\xBD/1\/2/g; print OFIL "$inline\n"; # still got its \n } close(IFIL); close(OFIL); __END__ ^R+80h = 0x92 -> apostrophe ^S+80h = 0x93 -> start quote ^T+80h = 0x94 -> end quote ^V+80h = 0x96 -> em-dash (doubledash) 0xBD -> 1/2 kill all span tags. kill all class= declarations within

tags. kill all font tags kill crap... convert to

. locase all tags. (they are, but...) kill all ^M.