regex2.py

This script parses the lines in an error-log file, error.log.1. From each line of the file, it extracts, the month, date, and year; the type of message; an optional IP address; and the error message.

Decimal-to-Binary main()

# -*- coding: utf-8 -*-
"""
Created on Tue Mar 23 09:15:46 2021

@author: rmontant
"""
import sys
import re
import collections

def main(argv=[__name__]):
    if len(argv) < 2:
        filename = input('What file? ')
    else:
        filename = argv[1]

    #timestampRE = r'(Mar \d\d) ((\d\d):(\d\d):(\d\d))'
    tsPAT = r'\[... ([-A-Z][a-z]{2}) (\d\d) \d\d:\d\d:\d\d (\d{4})\]'
    typePAT = r'\[(\w+)\]'  # --> [crit]  or [error]
    ipPAT = r'(\[client (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\])?'
    msgPAT = r'(.*)'

    #lineRE = re.compile(tsPAT + r'\s+' + typePAT + r'\s+' + ipPAT + r'\s+' + msgPAT)
    lineRE = re.compile( r'\s*'.join( [tsPAT,typePAT,ipPAT,msgPAT] ) )
    ts_dict = {}
    #timestamps = collections.Counter()
    with open(filename, 'r') as h:
        for line in h.readlines():
            #print(line)
            #mo = re.search(timestampRE, line)
            mo = lineRE.match(line)
            if mo != None:
                #print(mo)
                #print(mo.groups())
                #print()
                date = ( mo.groups()[2], mo.groups()[0], mo.groups()[1])
                value = (mo.groups()[3], mo.groups()[5])
                if date not in ts_dict.keys():
                    ts_dict[date] = []
                ts_dict[date].append(value)
                #timestamps.update((date, time))
            else:
                print('-->', line, '<--')
                pass

    #print(timestamps)    
    #for date, times in timestamps.items():
    for date, values in ts_dict.items():
        print(date, len(values))
    print()
#--------

if __name__ == '__main__':
    sys.exit(main(sys.argv))
#--------