| ||
| 2001-03-27 | © 2001-2003 Harry M. Hardjono ramstrong@earthlink.net | |
|
Home Page |
// **************************************************
// * ANAGRAM
// * Copyright 1998 Harry M. Hardjono
// * 10 Nov 1998: version 1.1
// * - Eliminate extraneous print statements
// * - More documentation and clean up
// *
// * 06 Nov 1998: version 1.0
// * - first release version
// * - uses C++ // comment convention
// * - Extensive command line options
// * - uses table for speedups
// *
// * To do:
// * - program clean up
// * - Dedicated Error and Warning functions
// *
// ***************************************************
// *******************************************************
// *** *** *** *** *** *** *** *** *** *** ***
// * * * * * * * * * * *
// ====================================================
// I N C L U D E S
// ====================================================
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <time.h>
// ====================================================
// E N U M S
// ====================================================
enum nextpos_type {
START, CURRENT, NEXT
};
enum sort_type {
NONE, ALPHA, LENGTH
};
enum state_type {
SETUP, PARSING, LOADDICT, DOANAGRAM, DONE
};
// ====================================================
// D E F I N E S
// ====================================================
#define MAXSTR 255
#define MAXREC 255
#define DICTSIZE (dict.blockcount * dict.blocksize)
// ====================================================
// G L O B A L S
// ====================================================
// ----------------------------------------------------
// Structures
// ----------------------------------------------------
struct ANAOPTS {
int maxword;
int minword; // Number of words per anagram
int maxletter;
int minletter; // Number of letters per word
enum nextpos_type nextwordpos; // Next word position
long timespent; // Time in seconds
short int dispcount; // Display count of anagram found
int anagramcount; // Max anagram found
short int wordlist; // Display wordlist on/off
short int anagram; // Display anagram on/off
};
struct D_ENTRY_STRUCT {
char *w_as_is; // Entry as is w/ edge spaces deleted
char *w_a; // Entry as alpha only
int w_a_len; // Length of alpha only
char *w_a_k; // Entry as distinct alpha only
};
struct DICTSTRUCT {
struct D_ENTRY_STRUCT *entry; // Points to each dict entry
int numofentry; // Entry count
short int filter; // Filter on/off
int blockcount; // Num of blocks
int blocksize; // Size of blocks
enum sort_type stype; // Type of sort
short int sdir; // Ascent (1) or Descent (-1)
};
struct PROGVARS {
char outfilename[MAXSTR];
char dictfilename[MAXSTR];
char inputtext[MAXSTR];
int ilength;
double anapersec; // anagram found per second
};
struct ANAOPTS option; // Options
struct DICTSTRUCT dict; // Dictionary
struct PROGVARS prog; // Program variables
// ----------------------------------------------------
// Look up tables
// ----------------------------------------------------
unsigned long wtable[256];
unsigned long itable[256];
// ====================================================
// P R O T O T Y P E S
// ====================================================
enum state_type SetUp(void);
enum state_type GetVars(char args[]);
static void SetTable (unsigned long table[], char textdata[]);
void DispInfo (void);
static char *FilterSpace(char *s);
static char *FilterText(char *s);
static char *FilterDup(char *s);
int LoadDict(void);
int CompareFunc(struct D_ENTRY_STRUCT *itema, struct D_ENTRY_STRUCT *itemb);
static int FoundInDict (char *wordstr);
void DoAnagram(void);
static int ExtraChar (char textdata[]);
static int SameChar (char textdata[]);
static void AppendWord (int *analen, char anaword[], int rc, long de[]);
static void RemoveWord (int *analen, char anaword[], int rc, long de[]);
void CleanUp(void);
enum state_type ShowHelp(void);
enum state_type ShowVersion(void);
// *****************************************************
// *** *** *** *** *** *** *** *** ***
// *** *** *** *** *** *** *** *** ***
// ====================================================
// M A I N
//
// Function called:
// - ShowHelp()
// - SetUp()
// - GetVars()
// - SetTable()
// - LoadDict()
// - DoAnagram()
// - CleanUp()
//
// Global variables used:
// - prog, dict
// - itable
//
//
// ====================================================
int main(int argc, char *argv[]) {
enum state_type state = SETUP;
unsigned int processinput; // Num of argv processed
while (state != DONE) {
if (state == SETUP) {
// puts("START");
processinput=0;
if (argc < 2) state = ShowHelp(); // return DONE
else state = SetUp(); // return PARSING
} else if (state == PARSING) {
// puts("PARSING");
processinput++;
if (processinput >= argc) state = DONE;
else {
// puts(argv[processinput]);
if (argv[processinput][0] == '-') {
state = GetVars(&argv[processinput][1]);
} else {
strcpy(prog.inputtext,FilterText(argv[processinput]));
prog.ilength = strlen(prog.inputtext);
SetTable(itable,prog.inputtext);
state = LOADDICT;
}
}
} else if (state == LOADDICT) {
// puts("LOADDICT");
// printf("Loading: %s\n",prog.dictfilename);
if (dict.numofentry < 1) LoadDict();
state = DOANAGRAM;
} else if (state == DOANAGRAM) {
// puts("DOANAGRAM");
if (dict.numofentry > 1) DoAnagram();
CleanUp();
state = PARSING;
} else if (state == DONE) {
// puts("DONE");
} else {
puts("Unknown state error.");
state = DONE;
}
// puts("Looping...");
}
// puts("Done!");
} /* End of main */
// ====================================================
// S E T U P
//
// Global variables used:
// - prog
// - option
// - dict
// ====================================================
enum state_type SetUp(void) {
strcpy(prog.outfilename,"");
strcpy(prog.dictfilename,"wordlist.txt");
strcpy(prog.inputtext,"anagramfun");
prog.anapersec = 0.0;
option.maxword=128;
option.minword=1;
option.maxletter=128;
option.minletter=1;
option.nextwordpos=CURRENT;
option.timespent=0;
option.dispcount=1;
option.anagramcount=0;
option.wordlist = 1;
option.anagram = 1;
dict.entry=NULL;
dict.numofentry=0;
dict.filter=1;
dict.blockcount=0;
dict.blocksize=65000;
dict.stype=LENGTH;
dict.sdir=1;
return PARSING;
} // End of SetUp()
// ====================================================
// G E T V A R S
//
// Function called:
// - ShowHelp(), ShowVersion()
// - DispInfo()
//
// Global variables used:
// - prog, dict, option
//
// ====================================================
enum state_type GetVars(char args[]) {
enum state_type state;
state = PARSING;
if (args[0] == 'h') state = ShowHelp();
else if (args[0] == 'v') state = ShowVersion();
else if (args[0] == 'i') DispInfo();
else if (args[0] == 'I') DispInfo();
else if (args[0] == 'c') option.dispcount = 1;
else if (args[0] == 'C') option.dispcount = 0;
else if (args[0] == 'w') option.wordlist = 1;
else if (args[0] == 'W') option.wordlist = 0;
else if (args[0] == 'a') option.anagram = 1;
else if (args[0] == 'A') option.anagram = 0;
else if (args[0] == 'f') dict.filter = 1;
else if (args[0] == 'F') dict.filter = 0;
else if (args[0] == 'd' || args[0] == 'D') {
strcpy(prog.dictfilename,&args[1]);
state = PARSING;
}
else if (args[0] == 'o' || args[0] == 'O') {
strcpy(prog.outfilename,&args[1]);
state = PARSING;
}
else if (args[0] == 'p') {
if (args[1] == 's') option.nextwordpos = START;
else if (args[1] == 'c') option.nextwordpos = CURRENT;
else if (args[1] == 'n') option.nextwordpos = NEXT;
else puts("Option p error");
}
else if (args[0] == 's') {
if (args[1] == 'A') dict.sdir = 1;
else if (args[1] == 'D') dict.sdir = -1;
else if (args[1] == 'a') dict.stype = ALPHA;
else if (args[1] == 'l') dict.stype = LENGTH;
else if (args[1] == 'n') dict.stype = NONE;
else puts("Option s error");
}
else if (args[0] == 'm') option.anagramcount = atoi(&args[1]);
else if (args[0] == 't') option.timespent = atoi(&args[1]);
else if (args[0] == 'n') {
if (args[1] == 'w') option.minletter = atoi(&args[2]);
else if (args[1] == 'W') option.maxletter = atoi(&args[2]);
else puts("Option n error");
}
else if (args[0] == 'r') {
if (args[1] == 'd') option.minword = atoi(&args[2]);
else if (args[1] == 'D') option.maxword = atoi(&args[2]);
else puts("Option r error");
}
return state;
} // End of GetVars
// ====================================================
// S E T T A B L E
//
// Set up the look up table.
// ====================================================
static void SetTable (unsigned long table[], char textdata[]) {
char *iptr;
memset(table,0,256*sizeof(unsigned long));
for (iptr=textdata;*iptr;iptr++) table[*iptr]++;
} // End of SetTable
// ====================================================
// D I S P I N F O
//
// Global variables used:
// - prog
// - option
// - dict
// ====================================================
void DispInfo (void) {
// puts("Show Info here...");
printf ("Anagram text string: %s\n",prog.inputtext);
printf ("\n");
printf ("Words in Anagram: %d - %d\n",option.minword,option.maxword);
printf ("Letters per word: %d - %d\n",option.minletter,option.maxletter);
printf ("Next word position: ");
switch (option.nextwordpos) {
case CURRENT: puts("Same word"); break;
case START: puts("Start of Dict."); break;
case NEXT: puts("Next word"); break;
default: puts("Unspecified"); break;
}
printf ("Time limit (secs): %d\n",option.timespent);
printf ("Show counter: %s\n",(option.dispcount)?"Yes":"No");
printf ("Show word list: %s\n",(option.wordlist)?"Yes":"No");
printf ("Show anagrams: %s\n",(option.anagram)?"Yes":"No");
printf ("Dictionary name: %s\n",prog.dictfilename);
printf ("Dictionary filter: %s\n",(dict.filter)?"On":"Off");
printf ("Dictionary sort: ");
switch (dict.stype) {
case ALPHA: puts("Alphabetical"); break;
case LENGTH: puts("Length"); break;
default: puts("No sort"); break;
}
printf (" Sort direction: %s\n",(dict.sdir == 1)?"Ascend":"Descend");
} // End of DispInfo()
// ====================================================
// F I L T E R S P A C E
//
// Filter out spaces.
// ====================================================
static char *FilterSpace(char *s) {
int i,j,slen;
slen = strlen(s);
for (i=0,j=0;s[j] = s[i];i++,j++) if (isspace(s[i])) j--;
return s;
} // End of FilterSpace
// ====================================================
// F I L T E R T E X T
//
// Filter out non-alpha and turn lowercase into uppercase.
// Modify input string into A-Z.
// ====================================================
static char *FilterText(char *s) {
int i,j,slen;
slen = strlen(s);
for (i=0,j=0;i<slen;i++)
if (isalpha(s[i])) s[j++] = toupper(s[i]);
s[j]='\0';
return s;
} // End of FilterText
// ====================================================
// F I L T E R D U P
//
// Filter out duplicates.
// ====================================================
static char *FilterDup(char *s) {
int i,j,slen;
slen = strlen(s);
for (i=0,j=0;i<slen;i++)
if (strchr(s,s[i]) >= &s[i]) s[j++] = s[i];
s[j]='\0';
return s;
} // End of FilterDup
// ====================================================
// L O A D D I C T
//
// Functions used:
// - FilterSpace()
// - FilterText()
// - SetTable()
// - ExtraChar()
// - FoundInDict()
// - FilterDup()
// - CompareFunc() (for qsort)
//
// Global variables:
// - prog, dict, option
//
// ====================================================
int LoadDict(void) {
int i;
FILE *infile;
FILE *outfile;
struct D_ENTRY_STRUCT *tptr; // Points to each dict entry
char wordstr[MAXSTR];
char origstr[MAXSTR];
int wordlen;
// --------------------------------------------------
// Open files
// --------------------------------------------------
if ((infile = fopen(prog.dictfilename,"r")) == NULL) {
printf("Error opening %s\n", prog.dictfilename);
return 1;
}
if (prog.outfilename[0]) {
if ((outfile = fopen(prog.outfilename,"w")) == NULL) {
printf("Error opening %s\n", prog.outfilename);
prog.outfilename[0] = '\0';
}
}
// --------------------------------------------------
// Load Dict
// --------------------------------------------------
while (fgets(wordstr, MAXSTR, infile) != NULL) {
// ------------------------------------------------
// Allocating Dictionary
// ------------------------------------------------
if ((dict.numofentry)+1 >= DICTSIZE) {
dict.blockcount++;
if ((tptr = (struct D_ENTRY_STRUCT *)
realloc(dict.entry,
sizeof(struct D_ENTRY_STRUCT) * DICTSIZE))
== (struct D_ENTRY_STRUCT *) NULL) {
printf("Dictionary full!\n");
dict.blockcount--;
goto DONEDICT;
}
dict.entry = tptr;
// printf("Resizing: %d\n",DICTSIZE);
}
// ------------------------------------------------
// Filter Text
// ------------------------------------------------
// Filter Space
FilterSpace(wordstr);
strcpy (origstr,wordstr);
FilterText(wordstr);
wordlen = strlen(wordstr);
// Filter Length
if ( wordlen > prog.ilength
|| wordlen > option.maxletter
|| wordlen < option.minletter) continue;
// Filter extraneous characters
SetTable(wtable,wordstr);
if (ExtraChar(wordstr)) continue;
// Filter words already in dictionary
if (dict.filter > 0) {
if (FoundInDict(wordstr)) continue;
}
// ------------------------------------------------
// Add word to dictionary
// ------------------------------------------------
if ((dict.entry[dict.numofentry].w_as_is = (char *)
malloc(sizeof(char) *
(strlen(origstr)+1))
) == (char *)NULL) {
printf ("Out of Memory Error in loading the dictionary.\n");
goto DONEDICT;
}
strcpy(dict.entry[dict.numofentry].w_as_is,origstr);
if ((dict.entry[dict.numofentry].w_a = (char *)
malloc(sizeof(char) *
(wordlen+1))
) == (char *)NULL) {
printf ("Out of Memory Error in loading the dictionary.\n");
free (dict.entry[dict.numofentry].w_as_is);
goto DONEDICT;
}
strcpy(dict.entry[dict.numofentry].w_a,wordstr);
dict.entry[dict.numofentry].w_a_len = strlen(wordstr);
FilterDup(wordstr);
if ((dict.entry[dict.numofentry].w_a_k = (char *)
malloc(sizeof(char) *
(strlen(wordstr)+1))
) == (char *)NULL) {
printf ("Out of Memory Error in loading the dictionary.\n");
free (dict.entry[dict.numofentry].w_as_is);
free (dict.entry[dict.numofentry].w_a);
goto DONEDICT;
}
strcpy(dict.entry[dict.numofentry].w_a_k,wordstr);
dict.numofentry++;
}
DONEDICT:
if (dict.stype != NONE) {
qsort(dict.entry,dict.numofentry,sizeof(struct D_ENTRY_STRUCT),
CompareFunc);
}
// --------------------------------------------------
// Echo word to screen
// --------------------------------------------------
if (option.wordlist) {
for (i=0;i<dict.numofentry;i++) {
if (option.dispcount) printf("%d ",i);
printf("%s\n",dict.entry[i].w_a);
if (prog.outfilename[0]) {
if (option.dispcount) fprintf(outfile, "%d ",i);
fprintf(outfile, "%s\n",dict.entry[i].w_a);
}
}
printf("Count: %ld\n", dict.numofentry);
}
// --------------------------------------------------
// Close file
// --------------------------------------------------
fclose (infile);
if (prog.outfilename[0]) {
fclose (outfile);
}
return 0;
} // End of LoadDict
// ====================================================
// C O M P A R E F U N C
//
// Sort by length and alpha
//
// Notes:
// This is used by qsort. I wonder if structs are okay.
// It gives a warning?
// ====================================================
int CompareFunc(struct D_ENTRY_STRUCT *itema,
struct D_ENTRY_STRUCT *itemb) {
int i;
i=0;
if (dict.stype == LENGTH && !i) {
i = itemb->w_a_len - itema->w_a_len;
}
if ((dict.stype == ALPHA || dict.stype == LENGTH) && !i) {
i = strcmp(itema->w_as_is,itemb->w_as_is);
}
i *= dict.sdir;
return i;
} // End of CompareFunc
// ====================================================
// F O U N D I N D I C T
//
// Notes:
// I don't use hash for this one.
// It's less than a second anyway.
// ====================================================
static int FoundInDict (char *wordstr) {
int found;
int i;
for (found=0,i=0;i<dict.numofentry;i++) {
if (!strcmp(wordstr,dict.entry[i].w_a)) {
found = 1;
break;
}
}
return found;
} // End of FoundInDict
// ====================================================
// D O A N A G R A M
//
// This is basically recursive algorithm.
// I implement it iteratively just so there's
// little overhead. There are a couple places that
// I can improve, but I decided to keep this extremely
// simple. Look up tables are used for speed.
//
// Functions called:
// - FilterDup()
// - AppendWord()
// - RemoveWord()
// - ExtraChar()
// - SameChar()
//
// Global variables:
// - option, prog, dict
// ====================================================
void DoAnagram(void) {
int i;
FILE *outfile;
int rc; // recursive level
long de[MAXREC]; // dictionary entry
char anaword[MAXSTR]; // anagram word
int analen; // anagram word length
unsigned long anacount; // anagram counter
clock_t startclock; // Time when starting
short int timecheck; // Check item periodically
char ikey[MAXSTR]; // Key code for input text
memset(wtable,0,sizeof(unsigned long)*256);
rc = 0;
de[rc] = 0;
anacount = 0;
anaword[0] = '\0';
analen=0;
timecheck = 0;
strcpy(ikey,prog.inputtext);
FilterDup(ikey);
startclock = clock();
// --------------------------------------------------
// Open file
// --------------------------------------------------
if (prog.outfilename[0]) {
if ((outfile = fopen(prog.outfilename,"a")) == NULL) {
printf("Error opening %s\n", prog.outfilename);
prog.outfilename[0] = '\0';
}
}
// --------------------------------------------------
// Start recursive algorithm here.
// --------------------------------------------------
for (;;) {
// ------------------------------------------------
// Check this now and then
// ------------------------------------------------
if (timecheck++ <= 0) {
if (option.timespent > 0 &&
(clock()-startclock)/CLOCKS_PER_SEC > option.timespent) {
goto ENDGAME;
}
if (dict.stype == LENGTH && dict.sdir == 1
&& dict.entry[de[0]].w_a_len * option.maxword < prog.ilength) {
goto ENDGAME;
}
if (!option.anagram && !prog.outfilename[0]) {
goto ENDGAME;
}
timecheck = 1;
}
AppendWord (&analen, anaword, rc, de);
// ------------------------------------------------
// Checking Anagram
// 1. Too long?
// 2. Too short?
// 3. Just right?
// ------------------------------------------------
if (analen > prog.ilength || rc >= option.maxword) {
RemoveWord(&analen, anaword, rc, de);
} else if (analen < prog.ilength) {
if (ExtraChar(dict.entry[de[rc]].w_a_k)) {
RemoveWord(&analen, anaword, rc, de);
} else {
rc++;
switch (option.nextwordpos) {
case START: de[rc] = 0; break;
case NEXT: de[rc] = de[rc-1]+1; break;
default: de[rc] = de[rc-1]; break;
}
}
} else {
if (rc >= option.minword - 1) {
if (SameChar(ikey)) {
if (option.anagram) {
if (option.dispcount) printf ("%lu ", anacount);
for (i=0;i<=rc;i++) printf ("%s ",dict.entry[de[i]].w_as_is);
putchar('\n');
}
if (prog.outfilename[0]) {
if (option.dispcount) fprintf (outfile, "%lu ", anacount);
for (i=0;i<=rc;i++) fprintf (outfile,"%s ",dict.entry[de[i]].w_as_is);
fputc('\n', outfile);
}
anacount++;
if (option.anagramcount > 0 && anacount >= option.anagramcount) {
goto ENDGAME;
}
}
}
RemoveWord(&analen, anaword, rc, de);
}
// ------------------------------------------------
// 1. Recurse
// 2. Skip over obvious words
// ------------------------------------------------
while (de[rc]>=dict.numofentry) {
rc--;
if (rc<0 || rc>MAXREC) goto ENDGAME;
RemoveWord(&analen, anaword, rc, de);
// The following code is _very_ useful if dict is sorted by length
while (de[rc] < dict.numofentry
&& analen+dict.entry[de[rc]].w_a_len > prog.ilength ) {
de[rc]++;
}
}
}
ENDGAME:
printf ("Total anagram: %lu\n", anacount);
prog.anapersec = anacount / ((double)(clock()-startclock)/CLOCKS_PER_SEC);
printf ("Anagram per second: %f\n", prog.anapersec);
// --------------------------------------------------
// Close file
// --------------------------------------------------
if (prog.outfilename[0]) {
fclose(outfile);
}
} // End of DoAnagram
// ====================================================
// E X T R A C H A R
//
// Return true if wtable has extraneous character
//
// Global variables:
// - wtable[], itable[]
// ====================================================
static int ExtraChar (char textdata[]) {
int extrachar;
char *iptr;
for (extrachar=0,iptr=textdata; *iptr;iptr++) {
if (wtable[*iptr] > itable[*iptr]) {
extrachar = 1;
break;
}
}
return extrachar;
} // End of ExtraChar
// ====================================================
// S A M E C H A R
//
// Return true if wtable matches itable
//
// Global variables:
// - wtable[], itable[]
// ====================================================
static int SameChar (char textdata[]) {
int samechar;
char *iptr;
for (samechar=1,iptr=textdata;*iptr;iptr++) {
if (wtable[*iptr] != itable[*iptr]) {
samechar = 0;
break;
}
}
return samechar;
} // End of SameChar
// ====================================================
// A P P E N D W O R D
//
// Append dictionary entry at the end of anagram string
//
// Global variables:
// - dict
// - wtable
// ====================================================
static void AppendWord (int *analen, char anaword[], int rc, long de[]) {
char *iptr;
for (iptr=dict.entry[de[rc]].w_a; *iptr; iptr++) {
++wtable[*iptr];
}
strcat(anaword,dict.entry[de[rc]].w_a);
*analen += dict.entry[de[rc]].w_a_len;
} // End of AppendWord
// ====================================================
// R E M O V E W O R D
//
// Remove dictionary entry at the end of anagram string
//
// Global variables:
// - dict
// - wtable
// ====================================================
static void RemoveWord (int *analen, char anaword[], int rc, long de[]) {
char *iptr;
for (iptr=dict.entry[de[rc]].w_a; *iptr; iptr++) {
--wtable[*iptr];
}
*analen -= dict.entry[de[rc]].w_a_len;
anaword[*analen]='\0';
de[rc]++;
} // End of RemoveWord
// ====================================================
// C L E A N U P
//
// Free allocated memory
// ====================================================
void CleanUp(void) {
long int i;
if (dict.numofentry>0) {
for (i=0;i<dict.numofentry; i++) {
free (dict.entry[i].w_as_is);
free (dict.entry[i].w_a);
}
free ((void *) dict.entry);
}
dict.entry=NULL;
dict.numofentry=0;
dict.blockcount=0;
} // End of CleanUp
// ====================================================
// S H O W H E L P
//
// Display usage and exit
// ====================================================
enum state_type ShowHelp(void) {
puts("Usage: anagram [OPTION]... string...\n");
puts("Makes anagrams out of each string.");
puts("-dX, -DX X is name of dictionary.");
puts("-oX, -OX X is filename for output.");
puts("-w, -W Display wordlist ON(w), OFF(W)");
puts("-a, -A Display anagram ON(a), OFF(A)");
puts("-i, -I Display info.");
puts("-c, -C Display counter: ON(c), OFF(C)");
puts("-f, -F filter: ON(f), OFF(F)");
puts("-s[A|D] sort: ascending(A) descending(D)");
puts("-s[a|l|n] sort: alpha(a), length(l), none(n)");
puts("-n[w|W]N # of letter per word: min(w), max(W), N is the number");
puts("-r[d|D]N Recursion depth: min(d), max(D), N is the number");
puts("-mN Max anagram count. N is the number");
puts("-tN Max time in seconds. N is the number");
puts("-p[s|c|n] Next word at: start(s), current(c), next(n)");
puts("-h, display this help and exit.");
puts("-v, display version and exit.");
return DONE;
} // End of ShowHelp
// ====================================================
// S H O W V E R S I O N
//
// Display version and exit
// ====================================================
enum state_type ShowVersion(void) {
puts("Anagram 1.1");
puts("Copyright 1998 Harry M. Hardjono");
puts("For bug reports: >/dev/NULL");
return DONE;
} // End of ShowVersion
|