wordlist, done in multiple files

"wordlist.h" — prototypes

#ifndef _WORDLIST_
#define _WORDLIST_

#include <stdio.h>
#include <string.h> // strcmp()
#include <ctype.h>  // isalpha(), etc.
#include <stdlib.h> // malloc(), free()
#include <time.h>   // clock()

typedef struct WordNode {
    char *word;
    unsigned count;
    struct WordNode *next;
} wordnode;

void cleanword(char *word);
wordnode *newnode(char *word, wordnode *next);
void append_after(wordnode *prev, char *word, unsigned depth);
void displaylist(wordnode *head);

extern unsigned maxdepth;  // keep track of how deeply the recursions go

#endif

"wordlist.c" — main()

/*
* Read words from a file and elide any non-letters; collect into a (sorted) linked list.
* 2019-10-18, w/ more comments  -bob,mon.
*/
#include "wordlist.h"   // includes system header files

unsigned maxdepth = 0;  // keep track of how deeply the recursions go

int main(int argc, char **argv)
{
    if (argc < 2) {
        fprintf(stderr, "usage: %s <file>\n", argv[0]);
        return 1;
    }

    unsigned clockstart = clock();  // begin timing the program

    char *word;     // points to each word as it is read in
    unsigned totalwords = 0;
    maxdepth = 0;   // global variable

    wordnode *head;
    head = NULL;    // list is initially empty

    FILE *fp = fopen(argv[argc-1], "r");
    // "%ms" allocates "just enough" space for the string
    while (EOF != fscanf(fp, " %ms", &word)) {
        cleanword(word);
        if (strlen(word) == 0)  // Is there any "word" left?
            free(word); // free up the unused word's storage space
        else {
            totalwords++;

            if (NULL == head) {
                // this is the first word added to the list
                //      wordnode *node = newnode(word, NULL);
                //      head = node;
                head = newnode(word, NULL);

            } else if ( strcmp(head->word, word) > 0 ) {
                // this word is the new head of the list
                //      wordnode *node = newnode(word, head);
                //      head = node;
                head = newnode(word, head);

            } else {
                // this word goes later in the list
                append_after(head, word, 1);    // initial recursive call...
            }
        }
    }
    fclose(fp);
    printf("%u total words.\n", totalwords);
    printf("%u maximum recursion depth.\n", maxdepth);

    // How long did it take to build the list?
    unsigned clockprint = clock();
    fprintf(stderr, "Read words/build list: %.6lf seconds\n",
        (double)(clockprint-clockstart)/(double)CLOCKS_PER_SEC);

        displaylist(head);
        // How long did it take to display the list?
        unsigned clockend = clock();
        fprintf(stderr, "display list: %.6lf seconds\n",
                (double)(clockend-clockprint)/(double)CLOCKS_PER_SEC);

    return 0;
}

"listhelper.c" — cleanword(), newnode(), displaylist()

#include "wordlist.h"

// Delete non-letters from "words"
void cleanword(char *word)
{
    while (*word) {     // point to each character in turn...

        // Accept letters, digits and apostrophes, and dashes:
        if (isalpha(*word) || isdigit(*word) || *word == '\'' || *word == '-') {
                *word = tolower(*word); // lowercase everything
                word++;  // advance "word" ptr to next character
        }
        else {    // overwrite this character with the rest of the word:
            char *p = word;
            do {
                *p = *(p+1);  // copy next character on top of this one
            } while (*(p++));   // test-before-increment gets '\0' copied too.
        }
    }
}

// Create and return a new node containing the new word
wordnode *newnode(char *word, wordnode *next)
{
    wordnode *node = malloc(sizeof(wordnode));
    node->word = word;
    node->count = 1;
    node->next = next;
    return node;
}

// Iteratively display each node of the list
void displaylist(wordnode *head)
{
    unsigned length = 0;
    wordnode *this;
    for (this = head; this != NULL; this = this->next) {
        printf("%5u  %s\n", this->count, this->word);
        length++;
    }
    printf("List length: %u\n", length);
}

Recursive version of append_after()

#include "wordlist.h"

// Recursively find the last list node that comes before (or is equal to)
// the new word.  Count the new word or insert after next, as appropriate.
void append_after(wordnode *prev, char *word, unsigned depth)
{
    if ( !strcmp(prev->word, word) ) {
        prev->count++;
        free(word);     // storage not needed anymore, so tidy up

    } else if ( prev->next == NULL || strcmp((prev->next)->word, word) > 0 ) {
        // Insert the new node between the previous node...
        // ...and whatever followed the previous node.
        wordnode *node = newnode(word, prev->next);
        prev->next = node;
        if (depth > maxdepth)
                maxdepth = depth;

    } else {
        // recursively try the rest of the list
        append_after(prev->next, word, depth+1);  // one more level of recursion
    }
}

Iterative version of append_after()

#include "wordlist.h"

// ITERATIVELY find the last list node that comes before (or is equal to)
// the new word.  Count the new word or insert after the next, as appropriate.
void append_after(wordnode *prev, char *word, unsigned depth)
{
    do {
        if (strcmp(prev->word, word) == 0) {
            prev->count++;
            free(word);
            break;      // abandon the while() loop

        } else if ( prev->next == NULL || strcmp((prev->next)->word, word) > 0) {
            // Insert the new node between the previous node...
            // ...and the node that followed the previous node.
            wordnode *node = newnode(word, prev->next);
            prev->next = node;
            if (depth > maxdepth)
                maxdepth = depth;
            break;      // abandon the while() loop

        } else {
            prev = prev->next;
            depth++;
        }
    } while (1);
}