// Translator from English text to Shavian script by Dave Coffin
//
// At present (Oct 2020) the American dictionary can be found at
// http://www.speech.cs.cmu.edu/cgi-bin/cmudict
//
// and the British dictionary is inside
// http://festvox.org/packed/festival/2.4/festlex_OALD.tar.gz
//
// and both are ignored by default in favor of the dictionary format
// used by shaw.py, with the added proviso that this file must be
// correctly sorted. For best results do:
//
// uconv -x Latin-ASCII input | sed -r "s/(&#8217;|&rsquo;|’)/'/g" | shaw dave.dict
//
// Part-of-speech tags are ignored because NLTK does not support C.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>

#define APOS "\'"
#define SQR(x) ((x)*(x))

#if !defined(uchar)
#define uchar unsigned char
#endif

uchar prefix[2200000], *suffix, *dict=prefix, *dend, aflag;

int wordcmp (const char *one, const char *two, int ic, int len);

#if defined(US) || defined(UK)

// These bypass the dictionary entirely
const uchar *except[] =
{ "to 𐑑", "for 𐑓", "of 𐑝", "the 𐑞", "and 𐑯", "a 𐑩", "an 𐑩𐑯", "no 𐑯𐑴" };

// Shavian letters are numbered 0 (peep) to 47 (yew) in Unicode order
// In the dictionary, 0x90 is added to match the last byte of their UTF-8 encoding.
const uchar lig[][3] = {
//{ 40,24,30 },		// ash + roar = are  wrong because marry != marring
  { 40,26,30 },		// odd + roar = are
  { 40,29,30 },		//  ah + roar = are
  { 40,35,30 },		//  up + roar = are
  { 41,36,30 },		// oak + roar = or
  { 41,39,30 },		// awe + roar = or
  { 42,33,30 },		// age + roar = air
  { 42,42,30 },		// air + roar = air
//{ 43,23,30 },		// egg + roar = err  wrong because ferry != furry
  { 43,43,30 },		// err + roar = err
  { 44,25,30 },		// ado + roar = array
  { 44,44,30 },		// array+roar = array
  { 45,22,30 },		//  if + roar = ear
  { 45,32,30 },		// eat + roar = ear
  { 45,46,30 },		// ian + roar = ear
  { 46,32,24 },		// eat + ash = ian
  { 46,32,25 },		// eat + ado = ian
  { 46,32,35 },		// eat + up  = ian
  { 47,8,37 },		// yea + ooze = yew
};
#endif

#if defined(US)

// CMUdict assumes middle-American pronunciation, with its
// Mary-marry-merry and father-bother mergers.
void load_usa()
{
  // The "ah" sound is absent in CMUdict, merged into "on".
  static const char cmu[45][4] = {
    "P","T","K","F","TH","S","SH","CH","Y","NG",
    "B","D","G","V","DH","Z","ZH","JH","W","HH",
    "L","M","IH","EH","AE","AH","AA","UH","AW","**",
    "R","N","IY","EY","AY","AH","OW","UW","OY","AO",
    "X","X","X","ER","ER" };

  FILE *fp;
  char c, sym[4], *word;
  int i;
 
  fp = fopen ("cmudict-0.7b", "r");
  if (!fp) {
    perror ("cmudict-0.7b");
    exit(1);
  }
  dend = dict;
  while (!feof(fp)) {
    if (!isalpha(c = fgetc(fp))) {
      while (!feof(fp) && fgetc(fp) != '\n');
      continue;
    }
    *dend++ = 0;
    word = dend;	// just for diagnostics
    do {
      *dend++ = c;
      c = fgetc(fp);
    } while (c != '(' && c != ' ');
    if (c == '(')
      while (fgetc(fp) != ')');
    *dend++ = ' ';
    while (1) {
      do c = fgetc(fp);
      while (c == ' ');
      if (c == '\n') break;
      for (i=0; i < 2 && isalpha(c); i++) {
	sym[i] = c;
	c = fgetc(fp);
      }
      if (c == '\n') ungetc (c, fp);
      sym[i] = 0;
      for (i=0; i < 45; i++)
	if (!strcmp (sym, cmu[i])) break;
      if (i == 45) {
        fprintf (stderr, "Unknown phoneme %s after %s\n", sym, word);
	exit(1);
      }
      if (i == 25 && c > '0') i = 35;	// change ado to up when stressed
      if (i == 32 && c < '1') i = 22;	// change eat to if when unstressed
      if (i == 43 && c < '1') i = 44;	// change err to array when unstressed
      *dend++ = 0x90 + i;
      for (i=0; i < sizeof lig/sizeof *lig; i++)
	if ( dend[-2] == lig[i][1]+0x90 &&
	     dend[-1] == lig[i][2]+0x90)
	  (--dend)[-1] = lig[i][0]+0x90;
    }
  }
  fclose (fp);
  *dend = 0;
}

#elif defined(UK)

#define isvowel(x) (((x)|32) >= 'a' && 0x104111 >> (((x)|32)-'a') & 1)
#define shvowel(x) ((0xff7800 >> (x & 63)/2) & 1)
#define rhotic(x) ((x & 63) == 30 || ((x & 63) > 39 && (x & 63) < 46))

void load_uk()
{
  static const char oald[50][4] = {
    "p","t","k","f","th","s","sh","ch","y","ng",
    "b","d","g","v","dh","z","zh","jh","w","h",
    "l","m","i" ,"e" ,"a" ,"@" ,"o", "u" ,"au","aa",
    "r","n","ii","ei","ai","uh","ou","uu","oi","oo",
    "0","1","e@","@@","X" ,"X" ,"i@","u@", "" ,"po" };
  FILE *fp;
  uchar c, sym[4], *wptr, rmap[256], vc;
  uchar word[256], w, tok, syl, nsyl;
  int i;
 
  fp = fopen ("oald-0.4.out", "r");
  if (!fp) {
    perror ("oald-0.4.out");
    exit(1);
  }
  dend = dict;
  while (!feof(fp)) {
    if (fgetc(fp) != '(' || fgetc(fp) != '\"')
      goto nextline;
    *dend++ = 0;
    wptr = dend;
    memset (rmap, vc=0, sizeof rmap);
    while ((c = fgetc(fp)) != '\"') {
      if (isvowel(dend[-1]) && !isvowel(c))
	rmap[vc++] = (c|32) == 'r';	// note which vowel clusters end with 'r'
      *dend++ = c;
      if (c == '_' || c == '`' || c == '-' || c == ' ') {
	dend = wptr-1;			// cut all the French and Latin crap
	goto nextline;
      }
    }
    *dend++ = ' ';
    do c = fgetc(fp);
    while (c != '(');			// skip the part-of-speech tag
    w = syl = nsyl = vc = 0;
    while (1) {
      for (i=0; i < 2; i++) {		// read a one- or two-byte symbol
	c = fgetc(fp);
	if (!isalnum(c) && c != '@') break;
	sym[i] = c;
      }
      sym[i] = 0;
      for (tok=0; tok < 50; tok++)	// convert it to a token
	if (!strcmp (sym, oald[tok])) break;
      if (c == '\n' || c == 0xff) {
	ungetc (c, fp);
	tok = 49;			// end-of-word token, usually generated by "pos" symbol
      }
      if (tok/2 == 20) {		// mark all phonemes in each syllable for stress
	nsyl++;
	while (syl < w) word[syl++] |= 64 * (tok - 20);
	continue;
      }
      if (tok == 47) {
	word[w++] = 27;  tok = 25;	// u@ = wool + ado
      }
      if (tok == 48) continue;
      word[w++] = tok;
      if (w > 1 && shvowel(word[w-2]) && !shvowel(tok) && ++vc)
	if (rmap[vc-1] && !rhotic(word[w-2]) && !rhotic(tok)) {
	  word[w++] = word[w-1];
	  word[w-2] = 128 | 30;		// insert missing "r" after a vowel cluster
	}
      if (tok == 49 && w--) break;	// 49 got written, must back over it
      if (tok == 50) {
        fprintf (stderr, "Unknown phoneme %s after %s\n", sym, wptr);
	exit(1);
      }
    }
    if (word[w-2] == 32 && word[w-1] & 128) w--;
    if (word[w-1] == 15 && word[w-2] & 128 && word[w-3] == 22)
      word[--w-1] = 15;
    if (nsyl == 1)			// one-syllable words are always stressed
      for (i=0; i < w; i++) word[i] |= 64;
    for (i=0; i < w; i++) {
      c = word[i];
      if (c == 32 || c == 35) c -= 10;	// change unstressed eat and up to if and ado
      if (c == 43) c++;			// change unstressed err to array
      word[i] = c & 63;
    }
    if (word[w-1] == 1 && word[w-2] == 31 && word[w-3] < 20) {
      word[w-2] = 25;
      word[w-1] = 31;			// insert schwa before final "nt"
      word[w++] = 1;
    }
    if (word[w-1] / 20 == 1 && word[w-1] / 2 % 5 == 0 && word[w-2] < 20) {
      word[w++] = word[w-1];		// insert schwa before final l/m/r/n
      word[w-2] = 25;
    }
    for (c=0; c < w; c++) {
      *dend++ = 0x90 + word[c];
      for (i=0; i < sizeof lig/sizeof *lig; i++)
	if ( dend[-2] == lig[i][1]+0x90 &&
	     dend[-1] == lig[i][2]+0x90)
	  (--dend)[-1] = lig[i][0]+0x90;
    }
    if (!strcasecmp (wptr, wptr-strlen(wptr)-1))	// delete duplicates
      dend = wptr-1;
nextline:
    while (!feof(fp) && fgetc(fp) != '\n');
  }
  fclose (fp);
  *dend = 0;
}

#else

void load_custom (char *file)
{
  FILE *fp;
  char *wptr, *prev="";
  int c, line=1, space;
 
  fp = fopen (file, "r");
  if (!fp) {
    perror (file);
    exit(1);
  }
  dend = dict;
  *dend++ = space = 0;
  wptr = dend;
  dict = suffix = 0;
  while ((c = fgetc(fp)) != EOF) {
    if (c == '\r') continue;
    if (c == '\n') {
      *dend++ = 0;
      prev = wptr;
      wptr = dend;
      goto nxt;
    }
    if (c == 0xf0 && fgetc(fp) == 0x90 && fgetc(fp) == 0x91)
      c = fgetc(fp);
    else if (c == 0xef && fgetc(fp) == 0xb8 && fgetc(fp) == 0x80)
      continue;
    else if (c == ' ') {
      if (space++) {
	fprintf (stderr, "%s: Extra space, line %d discarded.\n", file, line);
	goto del;
      }
    } else if (!isalpha(c) && !strchr("^$\':._",c)) {
      fprintf (stderr, "%s: Illegal character \'%c\', line %d discarded.\n", file, c, line);
del:  while (fgetc(fp) != '\n');
      dend = wptr;
nxt:  space = 0;
      line++;
      continue;
    }
    if (!suffix && !dend[-1] && c != '^') suffix = dend-1;
    if (!dict && !dend[-1] && c != '^' && c != '$') dict = dend-1;
    if      (dend[-1] == 0xa6 && c == 0xa9) dend[-1] = 0xbe;	// merge_ia
    else if (dend[-1] == 0xa6 && c == 0xbc) dend[-1] = 0xbd;
    else *dend++ = c;
    if (c == ' ' && !wordcmp (wptr, prev, 0, 0)) {
      dend = wptr-1;
      *dend++ = '@';			// heteronym separator
      wptr = prev;
    }
  }
  fclose (fp);
  dend--;
}

#endif

int wordcmp (const char *one, const char *two, int ic, int len)
{
  char a, b;

  if (len == 0) len = INT_MAX;
  ic *= 32;				// ic = "ignore case"
  while (len--) {
    a = *one++;  b = *two++;
    if (a == '_') a = ' ';		// ignore POS tags
    if (b == '_') b = ' ';
    a |= ic;  b |= ic;
    if ((a | b) == 32) return 0;	// both hit space or null
    if (a < b) return -1;
    if (a > b) return  1;
  }
  return 0;
}

void parse_root (uchar *in, uchar *out)
{
  char *low, *high, *mid, *cp;
  int comp;

  out[0] = out[1] = 0;
#ifdef PURGE
  if (!aflag) return;
#endif
  low = dict;
  high = dend;
  do {
    mid = low + (high - low)/2;
    while (mid[-1]) mid--;
    if (mid == low) while (*mid++);
    if (mid >= high) return;
    comp = wordcmp (in, mid, 1, 0);
    if (comp < 0) high = mid;
    if (comp > 0)  low = mid;
  } while (comp);
  while (1) {
    low = mid-1;			// look for earlier matches
    while (low[-1]) low--;
    if (wordcmp (in, low, 1, 0)) break;
    mid = low;
  }
  for (cp=mid; !wordcmp (in, cp, 1, 0); cp++) {
    if (!wordcmp (in, cp, 0, 0)) goto exact;
    while (*++cp);
  }
  cp = mid;  exact:
  if (isupper(*cp)) out[0] = '*';
  for ( ; *cp != ' '; cp++)
    if (*cp == '_' && !aflag) out[0] = ',';
  strcat (out, cp+1);
  if (aflag & 1 && cp[1] == '.' ||
      aflag & 2 && cp[strlen(cp)-1] == '.')
    out[0] = 0;
}

int parse_suffix (uchar *in, uchar *out, int adj)
{
  int split, len, pass, score=0, best=0, i;
  uchar *suff, *tail, *cp, word[256], try[256], sflag;
  static const char penal[][8] =
  { "bed","can","cat","cent","dance","ine","kin","one","pal","path","ster","tie","tied","ties","tying","wing","x" };

  parse_root (in, out);
  for (cp=in; *cp; cp++) *cp |= 32;
  len = cp-in;
  if (out[0]) return SQR(len+adj);
  for (suff = suffix+1; *suff++ == '$'; suff += strlen(suff)+1) {
    split = len - (strchr(suff,' ') - (char *)suff);
    if (split < 2 || wordcmp (in+split, suff, 1, 0)) continue;
    if (aflag & 2 && suff[strlen(suff)-1] == '.') continue;
    if (in[split] == in[split-1])
      if (len-split == 1 || strchr("eos",in[split])) continue;
    if (in[split-1] == in[split-2]) {
      if (!strcmp (in+split, "ess") && strchr("ln",in[split-1])) continue;
    } else {
      if (!strcmp (in+split, "ry") && strchr("aeiouf",in[split-1])) continue;
      if (!strcmp (in+split, "ha") && strchr("cpst",in[split-1])) continue;
      if (!strcmp (in+split, "th") && strchr("e",in[split-1])) continue;
      if (!strcmp (in+split, "d") && strchr("adeiou",in[split-1])) continue;
      if (!strcmp (in+split, "w") && strchr("aeo",in[split-1])) continue;
      if (!strcmp (in+split, "t") && strchr("aeioust",in[split-1])) continue;
      if (!strcmp (in+split, "k") && strchr("aceino",in[split-1])) continue;
      if (!strcmp (in+split, "r") && strchr("aeiou",in[split-1])) continue;
      if (!strcmp (in+split, "m") && strchr("eis",in[split-1])) continue;
      if (!strcmp (in+split, "z") && strchr("i",in[split-1])) continue;
      if (!strcmp (in+split, "n") && strchr("eio",in[split-1])) continue;
    }
    if (!strcmp (in+split, "es") && !strchr("hiosuxz",in[split-1])) continue;
    while (*suff++ != ' ');
    for (pass=0; pass < 2; pass++) {
      strcpy (word, in);
      word[split] = word[split+1] = 0;
      if (pass) ;
      else if (in[split-1] == 'i' && !strchr("cfikmpsv",in[split]))
	word[split-1] = 'y';
      else if (strchr("aeiouy\'",in[split]) && !strchr("aeio",in[split-1])) {
        if (in[split] == 'u' && (in[split+1]=='b' || in[split+1]=='p')) continue;
	if (in[split-1] == in[split-2] && !strchr("hsw",in[split-1]))
	  word[split-1] = 0;
	else if ((strchr("cdghlsuvz",in[split-1]) || in[split] == 'e' ||
		   strchr("aeiousy",in[split-2]))
		&& (!strchr("cg",in[split-1]) || !strchr("aou",in[split])))
	  word[split] = 'e';
	else continue;
      } else if (!strcmp(word+split-2, "dg"))
	word[split] = 'e';
      else continue;
      sflag = aflag;
      aflag &= ~2;
      if (in[split] != '\'' || word[split]) aflag |= 2;
      if (score = parse_suffix (word, try, split-strlen(word))) {
#ifdef SCORE
        printf("%s %s %d + %d", word, in+split, score, SQR(len-split+adj));
#endif
	score += SQR(len-split+adj);
	if (in[split] == '\'' && !pass) score /= 2;
	if (!strcmp (in+split, "call")) score = 1;
	for (i=0; i < sizeof penal/sizeof *penal; i++)
	  if (!strcmp (in+split, penal[i])) score -= 9;
	if (score < 1) score = 1;
#ifdef SCORE
	printf(" = %d\n", score);
#endif
      }
      aflag = sflag;
      if (score <= best) continue;
      best = score;
      strcpy (out, try);
      i = strlen(out);
      if (in[split-1] == 'e' && !strchr("aegiou",in[split-2]) &&	 // silent e speaks up
	  strchr("aou",in[split]) && strchr("dlmnprstu",in[split+1]) &&
	  strcmp(in+split,"arm") && strcmp(in+split,"out") && strcmp(in+split,"und") && strcmp(in+split,"up")) {
	if (strchr("\xa6\xb0",out[i-1])) i--;
	out[i++] = 0xa6;
	out[i] = 0;
      }
      cp = suff;
      tail = word+strlen(word);
      if (out[i-1] == *cp && strchr("\xa4\xaf",*cp) && strlen(cp) < 3)
	out[i-1] = 0;			// with -n, -l, -ly, one is enough
      if (out[i-1] == 0xa6 && strchr("\xa9\xbc",*cp)) // link ia and ear
	out[i-1] = *cp++ == 0xbc ? 0xbd : 0xbe;
      if (strchr("vw",tail[-1]) && out[i-1] == 0x93 && *cp > 0x97 && strcmp(in+split,"s"))
	out[i-1] += 10;			// change f to v in Slavic surnames
      if (!strcmp(tail-2,"le") && !strcmp(cp,"\xa6") &&
	  !strcmp(out+i-2,"\xa9\xa4"))
	   strcpy(out+i-2,"\xa4");	// drop the schwa when -le becomes -ly
      if (cp[1] != 0x99 && !strchr("\'\x9b\x9f",*cp) &&
	  !strcmp(out+i-2,"\xa9\xa5") && strchr("\x9e\x9f",out[i-3]))
	   strcpy(out+i-2,"\xa5");	// drop the schwa in -sm and -thm words
      if (strchr("\xa9\xad\xbe",out[i-2]) && out[i-1] == 0xa4 && !strcmp(cp,"\xa6\x91\xa6")) {
	strcpy(out+i-2, strchr("\x96\x97\xa0\xa1",out[i-3]) || out[i-2] == 0xbe ? "\xa6":"");
	strcat(out,"\xa8\xa4");		// strange "-(i)ality" rule
      }
      strcat (out, cp);
    }
  }
  if (in[len-1] == in[len-2] && !strchr("aeiosu",in[len-2])) {
    aflag |= 2;
    strcpy (word, in);
    word[len-1] = 0;			// drop final double consonant
    score = parse_suffix (word, try, 0);
    if (best < score) {
      best = score;
      strcpy (out, try);
    }
  }
  cp = out + strlen(out);
  if (cp-out < 2) return best;
  if (cp[-1] == ':' && cp[-2] > 0x99) {
    for (i=-3; cp+i > out && cp[i] < 0x90; i--);
    if (cp[i] == cp[-2] || cp[i] == cp[-2]-10 ||
	cp[-2] == 0x9f && strchr("\x96\x97\xa0\xa1",cp[i])) {
      *++cp = 0;
      cp[-1] = ':';
      cp[-2] = cp[-3];
      cp[i=-3] = 0xa9;
    }
    if (cp[i] < 0x98) cp[-2] -= 10;
  }
  if (cp-out < 4) return best;
  if (!strcmp(cp-4,"\x92\xa9\xa4\xa6") && strchr("\xa6\xa9",cp[-5]))
       strcpy(cp-4,"\x92\xa4\xa6");	// "ically" is pronounced "icly"
  if (!strncmp(cp-4,"\x91\xb5",2) && strchr("\xa9\xb1\xba\xbc",cp[-2])) cp[-4] = 0x97; // tu becomes chu
  if (cp[-1] == 0xbe) cp++;		// ia palatization rules
  if (cp[-2] == 0xbe && strchr("\x91\x95\x9f",cp[-3]) && strchr("\x95\xa4\xaf",cp[-1])) {
    i = 0x96;
    if (cp[-3] == 0x91)
    { if (cp[-4] == 0x95) i = 0x97; }	// stia --> scha
    else if (strchr("\xaf",cp[-1]) && strchr("\xb0\xb1\xb4\xb5\xb7\xbb\xbf",cp[-4]))
      i = 0xa0;
    cp[-3] = i;
    cp[-2] = 0xa9;
  }
  return best;
}

int parse_prefix (char *in, char *out, int ms)
{
  int split, len, score, best, i;
  char *pref;
  uchar try[256];

  best = parse_suffix (in, out, 0);
  len = strlen(in);
  if (best == len*len) return best;
  for (pref = prefix+1; *pref++ == '^'; pref += strlen(pref)+1) {
    split = strchr(pref,' ') - pref;
    if (split <= ms || split > len-2 || wordcmp (in, pref, 1, split)) continue;
    if ((in[split-1]|32) == 'u' && (in[split]|32) == 'n') continue;
    if (split == 1 && (in[0]|32) == 'z' && strchr("aeiouy",in[1]|32)) continue;
    while (*pref++ != ' ');
    strcpy (try, pref);
    i = strlen(try);
    aflag = in[split-1] != '\'';
    if (score = parse_prefix (in+split, try+i, 1))
#ifdef SCORE
    { printf("%.*s %s %d + %d", split, in, in+split, SQR(split), score);
#endif
      score += SQR(split);
      if (split == 2 && !strncmp(in,"la",2)) score -= 4;
#ifdef SCORE
      printf(" = %d\n", score); }
#endif
    if (score <= best) continue;
    best = score;
    if (try[i-1] == try[i] && strchr("\xa6\xa7",try[i-2]) && strchr("\xa4\xa5\xae\xaf",try[i]) ||
	!strncmp (try, "\xa5\xa9\x92\x92", 4) || !strncmp (try, "\xa5\xa9\x92*\x92", 5))
      do try[i-1] = try[i];	// undupe ill-, imm-, irr-, inn-, and macc-
      while (try[i++]);
    strcpy (out, try);
  }
  return best;
}

void shaw_print (uchar *cp, int cap)
{
  if ((cap || strchr(cp,'*')) && !strchr(cp,',')) printf ("·");
  do if (strchr(":.,*",*cp)) ;
     else if (*cp == '\'') printf("%s", APOS);
     else if (*cp < 0x90) putchar(*cp);
     else printf ("%c%c%c%c",0xf0,0x90,0x91,*cp);
  while (*++cp);
}

int main (int argc, char **argv)
{
  FILE *fp = stdin;
  uchar c, cap, w=0, sent=0, script=0, *cp;
  char word[256], copy[256], out[256], pword[256];
  const char befto[5][2][10] =
  { { "has","𐑕" },{ "have","𐑓" },{ "used","𐑕𐑑" },{ "unused","𐑕𐑑" },{ "supposed","𐑕𐑑" } };
  int i;

#if defined(US)
  load_usa();
#elif defined(UK)
  load_uk();
#else
  if (argc < 2) {
    fprintf (stderr, "Usage: %s dictionary-file\n", argv[0]);
    exit (1);
  }
  load_custom (argv[1]);
#endif
#ifdef DUMP
  for (cp = dict; cp < dend; cp++) {	// print the whole dictionary and exit
    if (*cp == 0) {
      shaw_print (cp+1, 0);
      putchar ('\n');
    }
  }
  exit(0);
#endif
  while (!feof(fp)) {
    c = fgetc(fp);
    if (isalpha(c) || (w && c == '\'')) {
      if (!w) cap = sent++ && isupper(c);
      word[w++] = c;
      continue;
    }
    if (strchr (".:?!", c)) sent = 0;
    if (w) {
      word[w] = 0;
      if (script) goto pass;
#if defined(US) || defined(UK)
      for (i=0; i < sizeof except/sizeof *except; i++)
	if (!wordcmp (word, except[i], 1, 0)) {
	  printf ("%s", except[i]+w+1);
	  goto end;
	}
      if (word[0] == 'I' && !isalpha(word[1])) cap = 0;
#endif
      if (!strcasecmp(word,"to"))
	for (i=0; i < 5; i++)
	  if (!strcasecmp(pword,befto[i][0]))
	    printf ("@%s ", befto[i][1]);
      aflag = 0;
      strcpy (copy, word);
      if (parse_prefix (copy, out, 0)) {
        if ((i = strlen(out)) > 2 && !strcmp(out+i-2,"\xa5\x9a")) out[i-1] = 0;
	if (cp = strstr(out,"\xb2\x9f\xb1\x96\xa9\xaf")) *cp = 0xa6;
#ifdef PURGE
	for (i=0; i < 2; i++)
	{ printf ("\n%s ", word);
	  shaw_print (out, 0);
	  word[0] &= -33; }
#else
	shaw_print (out, cap);
#endif
      } else pass: printf ("%s", word);
      memcpy (pword, word, 256);
    }
end:w=0;
    if (c != 0xff) putchar(c);
    if (c == '<') {			// pass through HTML/XML tags and Javascript
      i=0; do {
	putchar (c = fgetc(fp));
	if (i < 256) word[i++] = c;
      } while (c != '>' && !feof(fp));
      if (!strncasecmp (word, "div", 3)) sent = 0;
      if (!strncasecmp (word, "script", 6)) script = 1;
      if (!strncasecmp (word,"/script", 7)) script = 0;
      if (!strncasecmp (word, "style", 5)) script = 1;
      if (!strncasecmp (word,"/style", 6)) script = 0;
    }
  }
}