You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
smart-top/smart/logger/__init__.py

77 lines
3.2 KiB
Python

"""
This file looks into the logs to determine if there is any intrusion or provides means to assess logs
"""
import pandas as pd
import numpy as np
import transport
import datetime
import io
import json
import re
from datetime import datetime
_date = "(^[A-Z][a-z]{2}) ([0-9]{2}) ([0-9]{2})\:([0-9]){2}\:([0-9]{2})"
_ip = "\d+\.\d+\.\d+\.\d+"
_regex = {
'login':{'pattern':f'{_date} .*Accepted password for ([a-z]+) from ({_ip})', 'columns':['month','day','hour','minute','second','user','ip']},
'attacks':{'pattern':f'{_date} .*Invalid user ([a-z,0-9]+) from ({_ip})','columns':['month','day','hour','minute','second','user','ip']},
'risk':{'pattern':f'{_date} .*Failed password for ([a-z,0-9]+) from ({_ip})','columns':['month','day','hour','minute','second','user','ip']} #-- accounts at risk
}
_map = {'Jan':1,'Feb':2,'Mar':3,'Apr':4,'May':5,'Jun':6,'Jul':7,'Aug':8,'Sep':9,'Oct':10,'Nov':11,'Dec':12}
def risk (_content,_id='user'):
"""
compute the risk associated with accounts given the counts, this should be indicated by the number of failed password attempts in a given time frame
"""
_df = pd.DataFrame(_content)
_g = _df.groupby([_id]).apply(lambda row: {'start_date':row.date.min(),'end_date':row.date.max() ,'count':row[_id].size} )
_df = pd.DataFrame(_g.tolist())
_df[_id] = _g.index
_df.start_date = _df.start_date.astype(str)
_df.end_date = _df.end_date.astype(str)
return _df
def attacks (_content):
"""
This function will compute counts associated with a given set of ip addresses. If behind a load balancer IP can be ignored and counts will reflect break-in attempts
"""
return risk(_content,'ip')
def login(_content):
return risk(_content,'user')
def read (**_args):
"""
:path path of the auth.log files to load
"""
_year = _args['year'] if 'year' in _args else datetime.now().year
_path = _args['path']
f = open(_path)
_content = f.read().split('\n')
f.close()
r = {}
for line in _content :
for _id in _regex :
_pattern = _regex[_id]['pattern']
_columns = _regex[_id]['columns']
_out = re.search(_pattern,line)
if _out :
try:
_object = dict(zip(_columns,_out.groups()[:]))
if _id not in r :
r[_id] = []
_month = _object['month']
if _month in _map :
_object['month'] = _map[ _month ]
for field in ['day','month','hour','minute','second'] :
_object[field] = int (_object[field])
_object['date'] = datetime ( year=_year,month=_object['month'], day=_object['day'], hour=_object['hour'],minute=_object['minute'],second=_object['second'])#'-'.join([str(_object['month']),str(_object['day'])]) + ' '+_object['time']
# _object['date'] = np.datetime64(_object['date'])
r[_id].append(_object)
except Exception as e:
print(e)
pass
#
# At this point we have essential information formatted
# Summarizing this information will serve as a means to compress it
#
return r