#!/usr/local/bin/perl5 # # This program is used to creat the SGML file from a plain text file (the hypothesis generated by MT systems) # using the template of the source SGML # # Assuming the order of the source SGML is the same as the hypothesis file # # Note: this is all for NIST SGML version 09 # Use convert8to9.perl to convert from SGML version 8 to v9 # # By Joy, joy@cs.cmu.edu # Mar 06, 2003 # # Modified 08/19/2005 # Adding one argument to allow specifying the tgt language # # Usage: # perl5 generateSGMLfromText.perl textFile sysId srcv.09.sgm > hyp.sgm # #check parameters if($#ARGV<2){ print STDERR "\n----------------------------------------------"; print STDERR "\nUsage:"; print STDERR "\n\tperl5 generateSGMLfromText.perl textFile sysId src.sgm [tgtLanguage] > hyp.sgm"; print STDERR "\n----------------------------------------------\n"; exit; } $tgtLang="English"; if($#ARGV>=3){ $tgtLang = $ARGV[3]; } open HypFile, $ARGV[0]; $sysId = $ARGV[1]; open SrcFile, $ARGV[2]; @hypSegs = ; print STDERR "Hypothesis file has ",$#hypSegs+1," lines.\n"; $srcLines=0; while(){ #version 9: #src: #tst: s///i; s/<\/srcset>/<\/tstset>/i; #Doc tag, add system id # if(s///i){ } elsif(s///i){ # } #replacing the source text with the hypothesis if(/^.+<\/seg>/ $thisTrans <\/seg>/i; $srcLines++; } elsif(/.+<\/seg>/ $thisTrans <\/seg>/i; $srcLines++; } else{ print STDERR "Unknown format for segmentent.\n"; exit; } } print $_; } print STDERR "$srcLines in the src SGML file\n";