<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;"># This script takes in input an xml file (e.g. lexsub_trial.xml) and produces
# a second file (with extension .fixed) which removes spaces 
# in the representation of extended special characters
# between the number and the ; 
#
# We thank Richard Wicentowski and his student for spotting 
# this problem in the original corpus.

open IN, $ARGV[0];
open OUT, "&gt;$ARGV[0].fixed";

while(&lt;IN&gt;)
{
	s/&amp;#(\d+) ;/&amp;#$1;/g;
	
	print OUT $_;
}

close IN;
close OUT;</pre></body></html>