Sunday, December 27, 2015

Dictionary expander written in C (not mine)

/****************************************************************************/
/* (C) Copyright 1992 Universidade Estadual de Campinas (UNICAMP)           */
/*                    Campinas, SP, Brazil                                  */
/*                                                                          */
/* Authors:                                                                 */
/*                                                                          */
/*   Tomasz Kowaltowski  - CS Dept, UNICAMP <tomasz@dcc.unicamp.ansp.br>    */
/*   Claudio L. Lucchesi - CS Dept, UNICAMP <lucchesi@dcc.unicamp.ansp.br>  */
/*   Jorge Stolfi        - DEC Systems Research Center <stolfi@src.dec.com> */
/*                                                                          */
/* This file can be freely distributed, modified, and used for any          */
/*   non-commercial purpose, provided that this copyright and authorship    */
/*   notice be included in any copy or derived version of this file.        */
/*                                                                          */
/* DISCLAIMER: This software is offered ``as is'', without any guarantee    */
/*   as to fitness for any particular purpose.  Neither the copyright       */
/*   holder nor the authors or their employers can be held responsible for  */
/*   any damages that may result from its use.                              */
/****************************************************************************/

/* Last modified on Sun Aug  2 01:57:14 PDT 1992 by stolfi                  */

/* Expands a shrunken wordlist.
 *
 * Usage: expanddict.c < foo.shrunk > foo
 *
 * This program assumes each line of stdin contains a word,
 * consisting of a "prefix length", a "suffix" and a newline.
 * The prefix length is an integer in [0..35], encoded as a single
 * byte [0-9A-Z], which represents that many characters from
 * the beginning of the previous word. (In particular, the
 * prefix lengt of the first word is zero.)  The suffix
 * is simply the rest of the word.
 *
 * For instance, the word list
 *
 *    baby
 *    back
 *    backs
 *    backstage
 *    backup
 *    bath
 *    cobalt
 *    cobra
 *
 * would be encoded as
 *
 *    0baby
 *    2ck
 *    4s
 *    5tage
 *    4up
 *    2th
 *    0cobalt
 *    3ra
 *
 */

#include <stdio.h>

#define MAXLENGTH 1000

void main()
{
  int c, n, i;
  char w[MAXLENGTH];
  int w_len = 0;
  int bytes_read = 0;
  int bytes_written = 0;
  int words = 0;
 
  /* Loop on words: */
  while ((c = getchar()) != EOF)
    {
      ++bytes_read;
      if ((c >= '0') && (c <= '9'))
        n = c - '0';
      else if ((c >= 'A') && (c <= 'Z'))
        n = c - 'A' + 10;
      else
        { fprintf(stderr, "** bad prefix char at byte %d\n", bytes_read);
          exit(1);
        }
       
      if (n > w_len)
        { fprintf(stderr, "** bad prefix length at byte %d\n", bytes_read);
          exit(1);
        }
     
      /* Copy prefix from previous word: */
      for (i = 0; i < n; i++)
        { putchar(w[i]);
          ++bytes_written;
        };
       
      /* Copy and save rest of word: */
      while ((c = getchar()) != '\n')
        { ++bytes_read;
          putchar(c);
          ++bytes_written;
          if (n >= MAXLENGTH)
            { fprintf(stderr, "** word too long at byte %d\n", bytes_read);
              exit(1);
            }
          w[n] = c;
          ++n;
        }
      w_len = n;
       
      /* Copy newline: */
      ++bytes_read;
      putchar('\n');
      ++bytes_written;
      ++words;
    }
 
  /* Finalize: */
  fprintf(stderr, "%8d words\n", words);
  fprintf(stderr, "%8d bytes read\n", bytes_read);
  fprintf(stderr, "%8d bytes written\n", bytes_written);

  fclose(stdout);
  fclose(stderr);
}

No comments:

Post a Comment