#!/usr/local/bin/perl5 # # This program reads the log file from "generateLog.pl" and compute the NIST/Bleu scores # using bootstapping # # By Joy, joy@cs.cmu.edu # # Apr 27, 2005 [v11.2] # Allow to specify ConfidenceLevel, default = 0.95 (95%) # # Jan 21, 2005 # Bug fix, when creat sampledInfo, index should start from 1 # # Oct 25, 2004 # Updated percentile calculation based on http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm # # Oct 25, 2004 # Fixed the bug reported by David Chiang(dchiang@umiacs.umd.edu) regarding the # sort function # # Oct 18, 2004, # Corrected the confusing part of using n-gram_size and report_n-gram_size # # May27, 2003 # in accordance to generateLog-v11.pl, no "shortestRefLen" needed # # Usage: # perl5 bootstrapSingle.pl [ResampleTimes, default=1000] [Confidence, default=0.95] < logfile # # print STDERR "\nUsage:"; print STDERR "\n\tperl5 bootstrapSingle.pl [ResampleTimes, default=1000] [ConfidenceLevel, default=0.95] < logfile"; print STDERR "\n\n"; use lib "/afs/cs/user/joy/sharedLib"; #specify where the Statistics::Distributions library can be found use Statistics::Distributions; #adjustable parameter my $B=1000; if($#ARGV==0){ $B = $ARGV[0]; } my $confidenceLevel = 0.95; if($#ARGV==1){ $confidenceLevel = $ARGV[1]; } #global variables @allInfo=(); #allInfo[0]=docId__segId #allInfo[1]=ref0_len.... #allInfo[refNum]=ref_n_len #depricated #allInfo[refNum+1]=shortest_len #allInfo[refNum+1]=closest_len #allInfo[refNum+2+(n-1)*3]=n-gram count in test #allInfo[refNum+2+(n-1)*3+1]=n-gram match in test #allInfo[refNum+2+(n-1)*3+2]=n-gram info gain in test $maxNgramSize = 5; $refNum = 4; $segSize=0; $report_NIST_ngram_size=5; $report_Bleu_ngram_size=4; while(){ #NIST_ngram_size=5 #Bleu_ngram_size=4 if(/^Max_Ngram_size=([0-9]+)/){ $maxNgramSize = $1; $_=; /^Report_NIST_ngram_size=([0-9]+)/; $report_NIST_ngram_size = $1; $_=; /^Report_Bleu_ngram_size=([0-9]+)/; $report_Bleu_ngram_size = $1; print STDERR "Max_N=$maxNgramSize\n"; print STDERR "Report_NIST_ngram_size=$report_NIST_ngram_size Report_Bleu_ngram_size=$report_Bleu_ngram_size\n"; } if(/^RefNumber=([0-9]+)/){ $refNum=$1; print STDERR "RefNumber=$refNum\n"; } if(/^GlobalSegId=([0-9]+)/){ #information for one segment $segId=scalar $1; #GlobalSegId=79 #DocId=chtb_076 #SegId=6 #RefLen: 28 25 28 22 #ClosestRefLen 22 #1-gram: 21 15 115.94 #2-gram: 20 6 29.02 #3-gram: 19 4 8.15 #4-gram: 18 2 4.46 #5-gram: 17 1 0.00 #store information $_=; #DocId=chtb_076 s/^DocId=//; s/\x0A+//; s/\x0D+//; my $docId=$_; $_=; #SegId=6 s/^SegId=//; s/\x0A+//; s/\x0D+//; my $segIdInDoc=$_; $allInfo[$segId][0]=$docId."__".$segIdInDoc; #print "$segId: $docId--$segIdInDoc\n"; #print $allInfo[$segId][0]," in $segId\n"; $_=; #RefLen: 28 25 28 22 s/\x0A+//; s/\x0D+//; s/^RefLen:\x20+//; s/\x20+\Z//; my @refLens=split(/\x20/,$_); for(my $i=0;$i<$refNum;$i++){ $allInfo[$segId][$i+1]=$refLens[$i]; } $_=; #ClosestRefLen 24 s/^ClosestRefLen\x20+//; s/\x0A+//; s/\x0D+//; my $closestLen=$_; $allInfo[$segId][$refNum+1]=$closestLen; for(my $i=1;$i<=$maxNgramSize;$i++){ $_=; #1-gram: 21 15 115.94 s/^[0-9]+-gram:\x20+//; s/\x0A+//; s/\x0D+//; my @values = split(/\x20/,$_); #allInfo[refNum+2+(n-1)*3]=n-gram count in test $allInfo[$segId][$refNum+2+($i-1)*3]=$values[0]; #print "SegId=$segId I=$i Index=",$refNum+3+($i-1)*3,"\n"; #allInfo[refNum+2+(n-1)*3+1]=n-gram match in test $allInfo[$segId][$refNum+2+($i-1)*3+1]=$values[1]; #print "SegId=$segId I=$i Index=",$refNum+3+($i-1)*3+1,"\n"; #allInfo[refNum+2+(n-1)*3+2]=n-gram info gain in test $allInfo[$segId][$refNum+2+($i-1)*3+2]=$values[2]; #print "SegId=$segId I=$i Index=",$refNum+3+($i-1)*3+2,"\n"; } } } $segSize = $segId; print "Total $segSize segments read.\n"; my @nist_list=(); my @bleu_list=(); my @m_bleu_list=(); print "Create $B Samples\n"; print "Critical value = ",Statistics::Distributions::tdistr ($B-1, (1-$confidenceLevel)/2)," for confidence level of "; printf "%.2f\%\n",$confidenceLevel*100; for(my $iteration=0;$iteration<$B;$iteration++){ #print STDERR "Sampling for $iteration trial...\n"; #sampling @sampledInfo = (); for(my $i=1;$i<=$segSize;$i++){ #generate $segSize random numbers my $randomNumber = rand(); #print $randomNumber,"\n"; $randomNumber=int($randomNumber*$segSize+1); #should range between (1~segSize) #print $randomNumber,"\n"; $sampledInfo[$i]=$allInfo[$randomNumber]; } #calculate the NIST/Bleu for this sample #accumulate info #allInfo[0]=docId__segId #allInfo[1]=ref0_len.... #allInfo[refNum]=ref_n_len #depricated #allInfo[refNum+1]=shortest_len #allInfo[refNum+1]=closest_len #allInfo[refNum+2+(n-1)*3]=n-gram count in test #allInfo[refNum+2+(n-1)*3+1]=n-gram match in test #allInfo[refNum+2+(n-1)*3+2]=n-gram info gain in test #------------------------- #for length penalty my $sys_len=0; my $ref_len_all=0; my $closet_ref_len = 0; for(my $i=1;$i<=$segSize;$i++){ for(my $j=1;$j<=$refNum;$j++){ $ref_len_all += $sampledInfo[$i][$j]; } $sys_len += $sampledInfo[$i][$refNum+2]; #unigram count $closet_ref_len += $sampledInfo[$i][$refNum+1]; #closest ref len } #print "SysLen=$sys_len refLen=",$ref_len_all/$refNum,"\n"; my $nist_lenPen = nist_length_penalty($sys_len/($ref_len_all/$refNum) ); #print "NistLenPen = $nist_lenPen\n"; my $bleu_lenPen = bleu_length_penalty($closet_ref_len / $sys_len); #print "ClosestRefLen = $closet_ref_len\n"; #print "BleuLenPen = $bleu_lenPen\n"; #-------------------------- #for NIST scores @ngramInfoGain=(); @ngramCountInSys=(); for(my $i=1;$i<=$segSize;$i++){ for(my $k=1;$k<=$report_NIST_ngram_size;$k++){ $ngramCountInSys[$k]+=$sampledInfo[$i][$refNum+2+($k-1)*3]; $ngramInfoGain[$k]+=$sampledInfo[$i][$refNum+2+($k-1)*3+2]; } } #calculate NIST score my $nistScore=0.0; for(my $k=1;$k<=$report_NIST_ngram_size;$k++){ $nistScore+=$ngramInfoGain[$k]/$ngramCountInSys[$k]; } $nistScore*=$nist_lenPen; #print "NIST=$nistScore\n"; $nist_list[$iteration] = $nistScore; #print "Live $iteration --> ",$nist_list[$iteration],"\n"; #-------------------------------- #for bleu/ M-Bleu #n-gram precision @ngramCountInSys=(); @ngramMatched=(); @ngramPrec=(); for(my $i=1;$i<=$segSize;$i++){ for(my $k=1;$k<=$report_Bleu_ngram_size;$k++){ $ngramCountInSys[$k]+=$sampledInfo[$i][$refNum+2+($k-1)*3]; $ngramMatched[$k]+=$sampledInfo[$i][$refNum+2+($k-1)*3+1]; } } for(my $k=1;$k<=$report_Bleu_ngram_size;$k++){ #print "$k-gram info=",$ngramInfoGain[$k]," outof ",$ngramCountInSys[$k],"\n"; #print "Precision=",$ngramMatched[$k]/$ngramCountInSys[$k],"\n"; $ngramPrec[$k]=$ngramMatched[$k]/$ngramCountInSys[$k]; } #calculate Bleu score my $bleu_score = 0.0; my $nullify = 0; $nullify = 1 if @ngramPrec <= $report_Bleu_ngram_size; for(my $k=1;$k<=$report_Bleu_ngram_size;$k++){ if ($ngramPrec[$k]) { $bleu_score += 1/$report_Bleu_ngram_size * log($ngramPrec[$k]); } else { $nullify = 1; } } if ($nullify) { $bleu_score = 0; } else { $bleu_score = exp($bleu_score); } $bleu_score*=$bleu_lenPen; #print "Bleu=$bleu_score\n"; #print "Live $iteration --> $bleu_score\n"; $bleu_list[$iteration]=$bleu_score; #calculate Modified Bleu score my $modified_bleu = 0.0; for(my $k=1;$k<=$report_Bleu_ngram_size;$k++){ $modified_bleu += 1/$report_Bleu_ngram_size * $ngramPrec[$k]; } $modified_bleu*=$bleu_lenPen; #print "Modified Bleu=$modified_bleu\n"; $m_bleu_list[$iteration]=$modified_bleu; #print "\n"; } print "\nNIST Metric\n"; findConfidenceInterval(\@nist_list); calcMean_Var(\@nist_list); print "\n"; print "Bleu Score\n"; findConfidenceInterval(\@bleu_list); calcMean_Var(\@bleu_list); print "\n"; print "M-Bleu Score\n"; findConfidenceInterval(\@m_bleu_list); calcMean_Var(\@m_bleu_list); print "\n"; sub nist_length_penalty { my ($ratio) = @_; return 1 if $ratio >= 1; return 0 if $ratio <= 0; my $ratio_x = 1.5; my $score_x = 0.5; my $beta = -log($score_x)/log($ratio_x)/log($ratio_x); return exp (-$beta*log($ratio)*log($ratio)); } sub bleu_length_penalty{ my $r2hLen = shift; # penalize only if hypothesis is less than closest reference in length return 1 if $r2hLen < 1.0; return exp(1 - $r2hLen); } sub findConfidenceInterval{ my @score_list = sort {$a <=> $b} @{$_[0]}; #my $lowRange = 0.5*($score_list[int($B*0.025)] + $score_list[int($B*0.025) - 1]); #my $highRange = 0.5 * ($score_list[int($B*0.975)] + $score_list[int($B*0.975) - 1]); #base on http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm my $lowRange = calcPercentile( (1.0-$confidenceLevel)/2, \@score_list); my $highRange = calcPercentile( 1.0-(1.0-$confidenceLevel)/2, \@score_list); printf "Median=%.4f\t",calcPercentile(0.5, \@score_list); printf "nonPara interval: "; printf "[%.4f,%.4f] \n",$lowRange, $highRange; } sub calcMean_Var{ my @score_list = sort {$a<=>$b} @{$_[0]}; my $n = $#score_list + 1; my $df = $n-1; if($n<=1){ return; } my $sum_of_sqr = 0; my $sum_of_value = 0; for(my $j=0;$j<$n;$j++){ $theValue = $score_list[$j]; $sum_of_sqr += $theValue * $theValue; $sum_of_value += $theValue; } my $var = ($n*$sum_of_sqr - $sum_of_value*$sum_of_value) / ($n * ($n-1)); my $mean = $sum_of_value/$n; my $stdev = sqrt($var); my $criticalTvalue = Statistics::Distributions::tdistr ($df, (1.0-$confidenceLevel)/2); #for 95% confidence my $marginalError = $criticalTvalue * $stdev; printf "Mean =%.4f\t", $mean; printf "t-interval: [%.4f, %.4f]\t", $mean-$marginalError,$mean+$marginalError; printf "Var=%.8f\t", $var; printf "STDEV=%.4f\t", $stdev; printf "RSD=%.2f\%\n", 100*sqrt($var)/$mean; } sub calcPercentile{ my $p=$_[0]; my @ranked_score_list = @{$_[1]}; my $N=$#ranked_score_list+1; my $k=int( ($N+1)*$p); my $d=($N+1)*$p-$k; if($k==0){ return $ranked_score_list[0]; } if($k==$N){ return $ranked_score_list[$N-1]; } $k--; #to turn it into the array index return $ranked_score_list[$k]+$d*($ranked_score_list[$k+1]-$ranked_score_list[$k]); }