Word tree

cleanword.c

#include "wordtree.h"

// Delete non-letters from "words"
void cleanword(char *word)
{
    while (*word) {     // point to each character in turn...

        // Accept letters, digits and apostrophes, and dashes:
        if (isalpha(*word) || isdigit(*word) || *word == '\'' || *word == '-') {
                *word = tolower(*word); // lowercase everything
                word++;  // advance "word" ptr to next character
        }
        else {    // overwrite this character with the rest of the word:
            char *p = word;
            do {
                *p = *(p+1);  // copy next character on top of this one
            } while (*(p++));   // test-before-increment gets '\0' copied too.
        }
    }
}

newnode.c

#include "wordtree.h"

// Create and return a new node containing the new word
wordnode *newnode(char *word)
{
    wordnode *node = malloc(sizeof(wordnode));
    node->word = malloc(strlen(word)+1);
    strncpy(node->word, word, strlen(word));
    node->count = 1;
    node->left = node->right = NULL;
    return node;
}

wordtree.h

#ifndef _WORDTREE_
#define _WORDTREE_

#include <stdio.h>
#include <string.h> // strcmp()
#include <ctype.h>  // isalpha(), etc.
#include <stdlib.h> // malloc(), free()
#include <time.h>   // clock()

typedef struct WordNode {
    char *word;
    unsigned count;
    struct WordNode *left, *right;
} wordnode;

void cleanword(char *word);
wordnode *newnode(char *word);
void addtotree(wordnode **root, char *word, unsigned depth);
unsigned displaytree(wordnode *root);

extern unsigned maxdepth;  // keep track of how deeply the recursions go

#endif

displaytree.c

#include "wordtree.h"

// Recursively display each node of the tree
unsigned displaytree(wordnode *root)
{
    if (root == NULL)
        return 0;

    unsigned length = 0;
    length += displaytree(root->left);              // recurse left

    printf("%6u  %s\n", root->count, root->word);   // print this word
    length++;

    length += displaytree(root->right);             // recurse right

    return length;
}

addtotree.c

#include "wordtree.h"

// Recursively find the last list node that comes before (or is equal to)
// the new word.  Count the new word or insert after next, as appropriate.
void addtotree(wordnode **root, char *word, unsigned depth)
{
    if (*root == NULL) {
        *root = newnode(word);
        if (depth > maxdepth)
            maxdepth = depth;

    } else {
        int match = strcmp(word, (*root)->word);
        if (match == 0) {
            (*root)->count++;

        } else if (match < 0) {
            addtotree(&((*root)->left), word, depth+1);

        } else if (match > 0) {
            addtotree(&((*root)->right), word, depth+1);

        } else {
            // we should never get here...
            fprintf(stderr, "strcmp() failed!\n%u depth\n%s  %s\n",
                depth, word, (*root)->word);
        }
    }
}

displaytree-main.c

/*
* Read words from a file and elide any non-letters; collect into a binary tree.
* 2019-10-21  Don't depend on "%ms" - it's not on Macs, Windows
*/
#include "wordtree.h"   // includes system header files

#define BUFSIZE 1024
#define FMTSTR  "%1024s"

unsigned maxdepth = 0;  // keep track of how deeply the recursions go

int main(int argc, char **argv)
{
    if (argc < 2) {
        fprintf(stderr, "usage: %s <file>\n", argv[0]);
        return 1;
    }

    long unsigned clockstart = clock();  // begin timing the program

    char wordbuffer[BUFSIZE];   // hold each word as it's read in
    unsigned totalwords;

    FILE *fp = fopen(argv[argc-1], "r");

    wordnode *root = NULL;
    /*
    * Read each word and add it to the tree.
    */
    while (EOF != fscanf(fp, FMTSTR, wordbuffer)) {
        cleanword(wordbuffer);
        if (strlen(wordbuffer) > 0) {   // Is there any "word" left?
            addtotree(&root, wordbuffer, 1);  // initial recursive call...
            totalwords++;
        }
    }
    fclose(fp);
    printf("%u total words.\n", totalwords);
    printf("%u maximum recursion depth.\n", maxdepth);

    // How long did it take to build the tree?
    long unsigned clockprint = clock();
    fprintf(stderr, "Read words/build tree: %.6lf seconds\n",
        (double)(clockprint-clockstart)/(double)CLOCKS_PER_SEC);

    printf("%u distinct words.\n", displaytree(root));

    // How long did it take to display the tree?
    long unsigned clockend = clock();
    fprintf(stderr, "display tree: %.6lf seconds\n",
        (double)(clockend-clockprint)/(double)CLOCKS_PER_SEC);

    return 0;
}