############################################################# # Program metapro.pl # # Grabs values from metadata records and builds counts for # each value # # Author: Archie Warnock (warnock@awcubed.com) May 2004 ############################################################# use strict; my %key_counts; my $xml; my $key; my $file; # Get the list of all matching files use Cwd; my $current_dir = &cwd(); my $datadir= $ARGV[0]; # Command-line argument chdir $datadir; # change to the input directory my @filelist = <*.xml>; # get the list of all XML files there undef $/; # set up to slurp in the entire file if ( $datadir eq "" ) { print "\nUsage: metapro.pl \ Writes to screen unless redirected to a file."; exit; } foreach $file (@filelist) { open (XML, $file); # open one file at a time $xml = ; # read in the XML as one big string # Grab the contents of the element ($key) = ($xml =~ m#\s*(.*?)\s*#is); # Initialize the hash value, if not already defined unless (defined($key_counts{$key})) { $key_counts{$key} = 0; } $key_counts{$key}++; # Now increment the count close XML; # and clean up by closing the file } chdir $current_dir; # switch back to the original directory # Print out the results. This is tricky - it uses the numeric # comparison function to compare the frequencies and sorts the # keys on the results. As a result, the loop over the hash grabs # the keys in the order of the frequencies. my $sortkeys; foreach $sortkeys (sort {$key_counts{$b} <=> $key_counts{$a} } keys %key_counts) { print "$sortkeys ($key_counts{$sortkeys}).\n"; } print "---\n"