#!/usr/bin/perl $usage = "usage: cat | $0 where contains the files wsj_2300.mrg, wsj_2301.mrg, ..., wsj_2399.mrg from section 23 of the UPenn WSJ Treebank-II (LDC catalog no.: LDC95T7), are the PARC 700 dependency bank files. "; ($wsjDir) = shift @ARGV || die $usage; while(){ chomp; if(($file_id,$sent_id,$parc_id) = $_ =~ /\_(\d+)\.(\d+).*\.(\d+)/){ open(TREES, "<$wsjDir/wsj_$file_id.mrg"); read TREES,$data,1000000; @D = split(/(?=\( \([A-Z])/, $data); close(TREES); print "parc \#$parc_id\, wsj file wsj_$file_id.mrg, sentence \#$sent_id: \n\n"; print $D[$sent_id], "\n\n"; } }