""" This file looks into the logs to determine if there is any intrusion or provides means to assess logs """ import pandas as pd import numpy as np import transport import datetime import io import json import re from datetime import datetime _date = "(^[A-Z][a-z]{2}) ([0-9]{2}) ([0-9]{2})\:([0-9]){2}\:([0-9]{2})" _ip = "\d+\.\d+\.\d+\.\d+" _regex = { 'login':{'pattern':f'{_date} .*Accepted password for ([a-z]+) from ({_ip})', 'columns':['month','day','hour','minute','second','user','ip']}, 'attacks':{'pattern':f'{_date} .*Invalid user ([a-z,0-9]+) from ({_ip})','columns':['month','day','hour','minute','second','user','ip']}, 'risk':{'pattern':f'{_date} .*Failed password for ([a-z,0-9]+) from ({_ip})','columns':['month','day','hour','minute','second','user','ip']} #-- accounts at risk } _map = {'Jan':1,'Feb':2,'Mar':3,'Apr':4,'May':5,'Jun':6,'Jul':7,'Aug':8,'Sep':9,'Oct':10,'Nov':11,'Dec':12} def risk (_content,_id='user'): """ compute the risk associated with accounts given the counts, this should be indicated by the number of failed password attempts in a given time frame """ _df = pd.DataFrame(_content) _g = _df.groupby([_id]).apply(lambda row: {'start_date':row.date.min(),'end_date':row.date.max() ,'count':row[_id].size} ) _df = pd.DataFrame(_g.tolist()) _df[_id] = _g.index _df.start_date = _df.start_date.astype(str) _df.end_date = _df.end_date.astype(str) return _df def attacks (_content): """ This function will compute counts associated with a given set of ip addresses. If behind a load balancer IP can be ignored and counts will reflect break-in attempts """ return risk(_content,'ip') def login(_content): return risk(_content,'user') def read (**_args): """ :path path of the auth.log files to load """ _year = _args['year'] if 'year' in _args else datetime.now().year _path = _args['path'] f = open(_path) _content = f.read().split('\n') f.close() r = {} for line in _content : for _id in _regex : _pattern = _regex[_id]['pattern'] _columns = _regex[_id]['columns'] _out = re.search(_pattern,line) if _out : try: _object = dict(zip(_columns,_out.groups()[:])) if _id not in r : r[_id] = [] _month = _object['month'] if _month in _map : _object['month'] = _map[ _month ] for field in ['day','month','hour','minute','second'] : _object[field] = int (_object[field]) _object['date'] = datetime ( year=_year,month=_object['month'], day=_object['day'], hour=_object['hour'],minute=_object['minute'],second=_object['second'])#'-'.join([str(_object['month']),str(_object['day'])]) + ' '+_object['time'] # _object['date'] = np.datetime64(_object['date']) r[_id].append(_object) except Exception as e: print(e) pass # # At this point we have essential information formatted # Summarizing this information will serve as a means to compress it # return r