#!/usr/bin/perl -w # # words. --crb3 02nov01 # list and count words used in a document. # case sensitive. # --crb3 03jun02: -i for case-insensitive treatment # # $chatty=0; $sep=","; $srt="b"; $icase=0; die "usage: words [-i] [-a -n] infile\n -i: case-insensitive, -a: alpha-sort, -n: count-sort\n" unless defined($ARGV[0]); while(defined($arg=shift(@ARGV))){ if(substr($arg,0,1) eq '-'){ substr($arg,0,1)=""; if( ($val=substr($arg,0,1)) eq 'i'){ $icase=1; }else{ $srt=$val; } }else{ push(@infiles,$arg); } } $wordscount=0; while(defined($infile=shift(@infiles))){ open(IFIL,"<$infile") or die "can't open infile $infile\n"; print "$infile...\n" if $chatty; while(defined($inline=)){ chomp $inline; next if $inline =~ /^\s*$/; foreach $word (split(' ',$inline)){ $word =~ s/\W+$//; $word =~ s/^\W+//; next if $word eq ""; $word = lc($word) if $icase; # locase em all for case-insensitive if(exists $words{$word}){ $words{$word}++; }else{ $words{$word}=1; $wordscount++; } } } close IFIL; } if($srt eq "a"){ @uniquewords = sort { lc($a) cmp lc($b) } (keys %words); }elsif($srt eq "n"){ @uniquewords = sort { $words{$b} <=> $words{$a} } (keys %words); }else{ @uniquewords = sort { $words{$b} <=> $words{$a} } (sort(keys %words)); } foreach $word (@uniquewords){ print "$word$sep$words{$word}\n"; } $in = ($icase ? "in" : ""); print "$wordscount unique words, case-".$in."sensitive.\n"; __END__