Example: regular expression

This is the example program from the final review. It works on this dataset:

Decimal-to-Binary main()

#!/usr/bin/env python3
# instructor
# log parser
# 2021-05-06
import re
import matplotlib.pyplot as plt

def main(argv=[__name__]):
    if len(argv) < 2:
        filename = input('filename? ')
    else:
        filename = argv[1]

    errors = {}
    with open(filename, 'r') as h:
        for line in h.readlines():
            #pat = r'^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).* GET (.*) HTTP.* (4\d\d)'
            #
            # Hint:  build the pattern up a piece at a time, and test as you go.
            # I checked that the "ip-address" part worked, then added stuff until
            # I realized that the GET wasn't working because it has a double-quote in
            # front of it instead of a space.  Then the other pieces worked okay.
            #
            pat = r'^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).* "GET (.*) HTTP.* (4\d\d)'
            mo = re.search(pat, line)
            if mo:
                ip = mo.groups()[0]
                #print(ip)
                url = mo.groups()[1]
                #print(url)
                err = mo.groups()[2]
                #print(err)
                if err not in errors.keys():
                    errors[err] = []
                errors[err].append( (ip, url) )
            else:
                pass
                #print(line)

    print()
    lengths = []
    for key in sorted(errors.keys()):
        #print('{:3s}  {:d} items'.format(key, len(errors[key])))
        lengths.append( len(errors[key]) )

    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.plot(lengths)
    plt.savefig('lengths.png')
    plt.show()


if __name__ == '__main__':
    import sys
    sys.exit(main(sys.argv))