/* Cryptoquote solution without trying all 403291461126605635584000000
   possibilities (with any luck and a good /usr/dict/words)

   Copyright (C) 1997 Nathan I. Laredo

   This program is modifiable/redistributable under the terms
   of the GNU General Public Licence.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   Send your comments and all your spare pocket change to
   the address found in "whois tinycode.com"

   use in web pages:
   on stdin:
   "j=blahblahbalh"  solve jumble
   "anythingelse=blahblah blah" solve cryptoquote

   cryptoquote is a registered trademark of someone.
 */
/* Location of an ascii list of words separated by newline */
#define DICTIONARY_FILE "fatdict"
/* Maximum number of word matches per pass */
#define MAX_MATCHES 30000
#define MAXWORDLENGTH 64
#define NO_STATUS

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>


char *xlate[MAX_MATCHES], *xlate2[MAX_MATCHES], **windex, **in_word;
/* initial translate matrix supplied by user */
char xlc[32] __attribute__ ((aligned(32)));
/* hold starts of various length words in the dictionary */
int windexlen[MAXWORDLENGTH];
int windexlen_end[MAXWORDLENGTH];
int in, dict_size = 0;

void do_exit(i)
int i;
{
    printf("</CENTER>%d words in dict</BODY></HTML>\n", dict_size);
    exit(i);
}
int count_words(source, nchars)
char *source;
int nchars;
{
    int i, count, state;

    for (i = state = count = 0; i < nchars; i++)
        if (isalpha(source[i]))
            state = 1;
        else if (state)
            (state = 0, count++);

    return count;
}

int make_wordlist(index, source, nchars)
char **index, *source;
int nchars;
{
    int i, count, state;

    for (i = state = count = 0; i < nchars; i++)
        if (isalpha(source[i])) {
            source[i] = toupper(source[i]);
            if (!state)
                (state = 1, index[count++] = &source[i]);
        } else if (state)
            (source[i] = '\0', state = 0);
    return count;
}

int read_dict()
{
    FILE *f;
    long fsize, count;
    char *words;

    if ((f = fopen(DICTIONARY_FILE, "r")) == NULL) {
        perror(DICTIONARY_FILE);
        exit(1);
    }
    if (fseek(f, 0L, SEEK_END)) {
        perror(DICTIONARY_FILE);
        exit(1);
    }
    fsize = ftell(f);
    if (fseek(f, 0L, SEEK_SET)) {
        perror(DICTIONARY_FILE);
        exit(1);
    }
    /* save room to null terminate last value if no newline */
    if ((words = malloc(fsize + 1)) == NULL) {
        fprintf(stderr, "malloc failed for %ld bytes\n", fsize);
        do_exit(1);
    }
    if ((count = fread(words, 1, fsize, f)) < fsize) {
        fprintf(stderr, "fread error: %ld of %ld read\n", fsize, count);
        do_exit(1);
    }
    if (fclose(f))
        perror(DICTIONARY_FILE);

    count = count_words(words, fsize);

    if ((windex = malloc(count * sizeof(char *))) == NULL) {
        fprintf(stderr, "malloc failed for %ld bytes\n",
                count * sizeof(char *));
        do_exit(1);
    }
    return make_wordlist(windex, words, fsize);
}

/* for qsort: sort longest to shortest strings */
int strlencmp(a, b)
char **a, **b;
{
    return strlen(*b) - strlen(*a);
}

int strordcmp(a, b)
char **a, **b;
{
        return strcmp(*a, *b);
}

int findchar(s, c)
char *s;
int c;
{
    int i;
    for (i = 0; i < 26; i++)
        if (s[i] == c)
            return 1;
    return 0;
}

#ifndef NO_STATUS
static unsigned long int num_compares = 0;
static int mst = 0, wst = 0, mnst = 0;
#endif

/* return zero if pattern of characters matches */
int pcompare(crypted, plain, xl, l)
char *crypted, *plain, *xl;
int l;
{
    register int i;
#ifndef NO_STATUS
    num_compares++;
    /* print status every 65536 compares */
    if (!(num_compares & 0xFFFF)) {
        i = (num_compares & 0x30000) >> 16;
        printf("[%03d/%06d/%06d]%c\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b",
               wst, mnst, mst,
               i == 0 ? '|' : i == 1 ? '/' : i == 2 ? '-' : '\\');
        fflush(stdout);
    }
#endif

    /* string length already compared */
    for (i = 0; i < l; i++) {
        if (xl[crypted[i] - 'A'] == '_' && !findchar(xl, plain[i]))
            xl[crypted[i] - 'A'] = plain[i];
        if (xl[crypted[i] - 'A'] != plain[i])
            return 1;
    }
    return 0;
}

/* try to narrow down match list to a smaller list
 * compares all matches with specified word for new list of matches
 * runs through mB matches in xlateB, creating new list xlateA with
 * mA matches.   Return number of new matches found.
 * For first pass, xlateB = NULL and supplied translation (if any)
 * is used.
 */
int narrow_matches(xlateA, xlateB, mB, word)
char **xlateA, **xlateB;
int mB, word;
{
    int i, k, mA, start, len, end;

    len = strlen(in_word[word]);
    if ((start = windexlen[len]) < 0)
        return 0;
    end = windexlen_end[len];
#ifndef NO_STATUS
    mnst = 0;
    wst = word;
#endif
    for (k = mA = 0; k < mB && mA < MAX_MATCHES; k++) {
#ifndef NO_STATUS
        mst = mB - k;
#endif
        for (i = start; i < end && mA < MAX_MATCHES; i++) {
            if (xlateB != NULL)
                memcpy(xlateA[mA], xlateB[k], 26);
            else
                memcpy(xlateA[mA], xlc, 26);
            if (!pcompare(in_word[word], windex[i], xlateA[mA], len))
#ifndef NO_STATUS
                mnst =
#endif
                    mA++;
        }
    }
    return mA;

}
void frequencycount(s, out)
char *s, *out;
{
    int i;

    memcpy(out, "@@@@@@@@@@@@@@@@@@@@@@@@@@", 26);
    for (i = 0; i < strlen(s); i++)
        if (isalpha(s[i]))
            out[toupper(s[i]) - 'A']++;
}

int  freqcmp(in, out)
char *in, *out;
{
    int i;
    for  (i = 0; i < 26; i++)
        if (out[i] > in[i])
           return 1;
    return 0;
}

int main(argc, argv)
int argc;
char **argv;
{
    char instring[32767], oldstr[32767], *tmp, dowhat;
    int i, j, k, bytesread, count, use, start, end, len;
    int m1 = 0, m2 = 0, roothit = -1;

    memset(xlc, '_', 26);
    xlc[26] = '\0';
    for (j = 0, i = 1; i < argc && j < 32250; i++) {
        if (strlen(argv[i]) == 3 && argv[i][1] == '=' && isalpha(*argv[i]))
            xlc[toupper(*argv[i]) - 'A'] = toupper(argv[i][2]);
        snprintf(&instring[j], 510, "%s ", argv[i]);
        j = strlen(instring);
    }
    instring[j] = '\0';

    printf("Content-type: text/html\n\n"
           "<HTML>\n<HEAD><TITLE>Puzzle Solver Thinger... </TITLE></HEAD>\n"
           "<BODY BGCOLOR=\"#000000\" TEXT=\"#F5DEB3\">"
           "<H1>Puzzle Solver Thinger...</H1>\n"
           "<FORM METHOD=\"GET\" ACTION=\"/cgi-bin/decrypto.cgi\"><BR>\n"
           "<INPUT TYPE=\"TEXT\" SIZE=\"32\" NAME=\"j\">&nbsp;"
           "Jumble solver thinger</FORM>\n"
           "<FORM METHOD=\"GET\" ACTION=\"/cgi-bin/decrypto.cgi\"><BR>\n"
           "<INPUT TYPE=\"TEXT\" SIZE=\"32\" NAME=\"q\">&nbsp;"
           "Cryptoquote solver</FORM><CENTER>\n");

    if (strlen(instring) < 5)
        do_exit(0);

    /* An ugly method to allocate a string, but simplifies other stuff */

    if ((tmp = malloc(32 * MAX_MATCHES * 2)) == NULL) {
        fprintf(stderr, "malloc failed for %d bytes\n", 32 * MAX_MATCHES * 2);
        do_exit(1);
    }
    for (i = 0; i < MAX_MATCHES; i++) {
        xlate[i] = tmp;
        tmp += 32;
    }
    for (i = 0; i < MAX_MATCHES; i++) {
        xlate2[i] = tmp;
        tmp += 32;
    }
    /* setup initial translation and modify by command line args if any */
    /* almost every clue provided will exponentially decrease runtime */
    dict_size = read_dict();
    qsort(windex, dict_size, sizeof(char *), strordcmp);
    /* sort + uniq the output list */
    for (j = 1, i = 1; i < dict_size; i++) {
        if (strcmp(windex[i], windex[i - 1]))
                windex[j++] = windex[i];
    }
    dict_size = j;
    /* sort uniq dict by word length */
    qsort(windex, dict_size, sizeof(char *), strlencmp);

    /* index dictionary by word length */
    for (i = 0; i < MAXWORDLENGTH; i++)
        windexlen[i] = windexlen_end[i] = -1;

    for (i = j = 0; i < dict_size; i++) {
        k = strlen(windex[i]);
        if (j != k) {
            if (j)              /* this is buggy */
                windexlen_end[j] = i;
            windexlen[j = k] = i;
        }
    }
    dowhat = instring[0];
    tmp = strchr(instring,'=');
    if (tmp)
        strcpy(instring, (tmp + 1));
    bytesread = strlen(instring);

    strcpy(oldstr, instring);
    count = count_words(instring, bytesread);
    if ((in_word = malloc(count * sizeof(char *))) == NULL) {
        fprintf(stderr, "malloc failed for %d bytes\n",
                count * sizeof(char *));
        do_exit(1);
    }
    in = make_wordlist(in_word, instring, bytesread);
    if (dowhat != 'j')
        printf("Cryptoquote Solver<p>%d words read, processing...<br>\n", in);
    else
        printf("Jumble/Word Game Solver<p>\n");
    fflush(stdout);
    if (dowhat == 'j') {
        printf("<p>Solving for jumble/word game/scrabble puzzle: %s<p>\n", in_word[0]);
        len = strlen(in_word[0]);
        start = windexlen[len];
        end = windexlen_end[3];
        frequencycount(in_word[0], xlate[0]);
        if (start < 0)
            do_exit(1);
        for (i = start, m1 = 0; i < end && m1 < MAX_MATCHES; i++) {
            frequencycount(windex[i], xlate[1]);
            if (!freqcmp(xlate[0], xlate[1]))
                printf("%s\n", windex[i]);
        }
        printf("<p>\n\n");
        do_exit(0);
    }

    /*
     * Here's the algorithm:
     * First, find longest possible match of input words.
     * Take those matches, and apply to all remaining words from
     * longest word (roothit) to shortest word (length > 1)
     * and see what fits list of matches from previous compare.
     */

    /* re-order word list from longest to shortest using qsort */
    qsort(in_word, in, sizeof(char *), strlencmp);

    /* Stage 1:  find matches for longest possible word */
    use = 1; /* use = 1 for xlate, 2 for xlate2 (for output at end) */
    for (m1 = j = 0; roothit < 0 && j < in; j++)
        if((m1 = narrow_matches(xlate, NULL, 1, j))>0)
            roothit = j;

    if (!m1) {
        printf("... Match Failed.  dict_size=%d words.<p>\n", dict_size);
        do_exit(1);
    }

    /* find index of last word with more than one character */
    for (j = in - 1; j > roothit && strlen(in_word[j]) < 2; j--);

    /* Stage 2:  attack all words from shortest to longest */
    if (in > 1) {
        for (i = roothit + 1; i <= j; i++) {
            if (use == 1) {
                m2 = narrow_matches(xlate2, xlate, m1, i);
                /* ignore words with too many or no matches */
                if (m2)
                    use = 2;
            } else {
                m1 = narrow_matches(xlate, xlate2, m2, i);
                /* ignore words with too many or no matches */
                if (m1)
                    use = 1;
            }
        }

    }
    if (use == 2)
        count = m2;
    else
        count = m1;

    if (count == 0) {           /* this should NEVER happen */
        printf("<p>... Sanity check failed!\n"
               "email cryptoquote to laredo@gnu.ai.mit.edu<p>\n");
        do_exit(0);
    }
    printf("... Found %d possible solution%s<p>\n", count,
           count < 2 ? ":\n" : "s:\n");

    for (j = 0; j < count; j++) {
        if (use == 1)
            tmp = xlate[j];
        else
            tmp = xlate2[j];
        for (i = 0; k = oldstr[i]; i++) {
            if (isalpha(k)) {
                if (isupper(k))
                    putchar(tmp[k - 'A']);
                else
                    putchar(tolower(tmp[k - 'a']));
            } else putchar(' ');
        }
        printf("<br>\n");
    }
    do_exit(0);
}