\n"; # The emacs W3 browser seems to like a BR here $text = ""; $gloss = ""; %glosses = (); $_ = <>; do { do { if (/^[^\[\]]*\]/) { # gloss $gloss = $_; chomp; # print "$index has gloss in <$_>\n"; while( <> ) { chomp; goto check_poem_end if !length; $gloss .= "$_\n"; } } # print poem_file " " if /^\s/; $text .= $_; chomp; # print poem_file "$_
\n"; goto check_poem_end if !length; } while( $_ = <> ); check_poem_end: $_ = <>; # print "$index ok\n" if ( /^\n/ ); } until /^\n/; $_ = $text; if (length $gloss) { #print poem_file "\n\t
\n$gloss\n\t
\n";
$_ = $gloss;
s/\n/ /g;
s/\.\'/\'\. /g;
@_ = split /(? length $a } keys %glosses)
{
$value = $glosses{$key};
print STDERR "Error at poem $index. Empty gloss value for $key in $gloss\n"
if !length $value;
$key =~ s/\s/(?:[\\s\\n]+)(?:[a-zA-Z]+?\\s)?/g;
# The second part is mad overkill. It allows one word between
# words of the regexp. This only occurs once in the whole
# text (poem 16, tuke [gude] keep).
# print "gloss:$key=$value\n";
$key = qr/$key/im;
$clean_value = $value;
$clean_value =~ s/[^A-Za-z_]/-/g ;
$clean_value =~ s/^(-)+//;
$value =~ y/[\'\"]//d;
s/(^(?:(?:[^\<\n]*\)*[^\<\n]*?[^a-zA-Z])?)($key)(?![a-zA-Z])/$1$2<\/A>/gim or
print STDERR "<$key> is unattached gloss of value <$value> for poem $index.\n";
# This is ok for poem 712 where the gloss refers to the title
}
}
$_ = "\n$_";
chomp;
s/\n/\n/g; s/^
\n/
\n/gm; s/^((\s\s)+)(?!\s*$)/" " x length $1/mge; $text = $_; print poem_file $text; print poem_file "\n\t\t