parent
d3c0ae08be
commit
1a9c4b6630
@ -1,134 +0,0 @@
|
|||||||
h="""
|
|
||||||
This is a data-collector client, that is intended to perform data-collection operations and submit them to an endpoint
|
|
||||||
@required:
|
|
||||||
- key application/service key
|
|
||||||
- id node identifier
|
|
||||||
usage :
|
|
||||||
python data-collector.py --path config.json
|
|
||||||
The configuration file is structured as JSON object as follows :
|
|
||||||
{
|
|
||||||
id : identifier
|
|
||||||
key: customer's identification key,
|
|
||||||
api: http://localhost/monitor/1/client
|
|
||||||
folders:[]
|
|
||||||
}
|
|
||||||
NOTE: You can download a sample configuration file from https://the-phi.com/smart-top
|
|
||||||
"""
|
|
||||||
from utils.params import PARAMS as SYS_ARGS, Logger
|
|
||||||
import os
|
|
||||||
import requests
|
|
||||||
import json
|
|
||||||
# from threading import Thread, RLock
|
|
||||||
from monitor import Apps, Folders
|
|
||||||
import time
|
|
||||||
from datetime import datetime
|
|
||||||
class Collector :
|
|
||||||
def __init__(self) :
|
|
||||||
"""
|
|
||||||
The configuration file is passed to the class for basic initialization of variables
|
|
||||||
"""
|
|
||||||
self.httpclient = requests.Session()
|
|
||||||
|
|
||||||
def init(self):
|
|
||||||
# if 'folders' not in SYS_ARGS :
|
|
||||||
# #
|
|
||||||
# # If nothing is set it will monitor the temporary directory
|
|
||||||
# self.locations = [os.environ[name] for name in ['TEMP','TMP','TMPDIR'] if name in os.environ]
|
|
||||||
# else:
|
|
||||||
# self.locations = SYS_ARGS['folders']
|
|
||||||
|
|
||||||
#
|
|
||||||
# -- let's get the list of features we are interested .
|
|
||||||
url = SYS_ARGS['api']+'/1/client/login'
|
|
||||||
key = SYS_ARGS['key']
|
|
||||||
self.id = SYS_ARGS['id'] if 'id' in SYS_ARGS else os.environ['HOSTNAME']
|
|
||||||
headers = {"key":key,"id":self.id}
|
|
||||||
|
|
||||||
#
|
|
||||||
#-- what features are allowed
|
|
||||||
r = self.httpclient.post(url,headers=headers)
|
|
||||||
|
|
||||||
if r.status_code == 200 :
|
|
||||||
r = r.json()
|
|
||||||
|
|
||||||
self.features = r['features']
|
|
||||||
self.config = r['config'] #-- contains apps and folders
|
|
||||||
Logger.log(action="login",value=r)
|
|
||||||
else:
|
|
||||||
self.features = None
|
|
||||||
self.config = None
|
|
||||||
Logger.log(action='login',value='error')
|
|
||||||
def callback(self,channel,method,header,stream):
|
|
||||||
pass
|
|
||||||
def listen(self):
|
|
||||||
|
|
||||||
factory = DataSourceFactory()
|
|
||||||
|
|
||||||
# self.qlistener = factory.instance(type="QueueListener",args=_args)
|
|
||||||
# self.qlistener.callback = self.callback
|
|
||||||
# self.qlistener.init(SYS_ARGS['id'])
|
|
||||||
def post(self,**args) :
|
|
||||||
"""
|
|
||||||
This function will post data to the endpoint
|
|
||||||
"""
|
|
||||||
url = SYS_ARGS['api']+'/1/client/log'
|
|
||||||
key = SYS_ARGS['key']
|
|
||||||
id = SYS_ARGS['id'] if 'id' in SYS_ARGS else os.environ['HOSTNAME']
|
|
||||||
headers = {"key":key,"id":id,"context":args['context'],"content-type":"application/json"}
|
|
||||||
|
|
||||||
body = args['data'].fillna('').to_json(orient='records')
|
|
||||||
|
|
||||||
if args['data'].shape[0] > 0 :
|
|
||||||
r = self.httpclient.post(url,headers=headers,data=body)
|
|
||||||
Logger.log(action="post."+args['context'],value=r.status_code)
|
|
||||||
else:
|
|
||||||
Logger.log(action="data.error",value="no data :: "+args['context'])
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
"""
|
|
||||||
This function will execute the basic functions to monitor folders and apps running on the system
|
|
||||||
given the configuration specified on the server .
|
|
||||||
"""
|
|
||||||
while True :
|
|
||||||
try:
|
|
||||||
self.init()
|
|
||||||
if self.config and self.features :
|
|
||||||
ELAPSED_TIME = 60* int(self.features['schedule'].replace("min","").strip())
|
|
||||||
if 'apps' in self.config :
|
|
||||||
self.post( data=(Apps(node=self.id)).get(filter=self.config['apps']),context="apps")
|
|
||||||
if 'folders' in self.config and self.config['folders'] :
|
|
||||||
folder = Folders(node=self.id)
|
|
||||||
f = folder.get(path=self.config['folders'])
|
|
||||||
self.post(data = f ,context="folders")
|
|
||||||
|
|
||||||
Logger.log(action='sleeping',value=ELAPSED_TIME)
|
|
||||||
#
|
|
||||||
# In case no configuration is provided, the system will simply fall asleep and wait
|
|
||||||
# @TODO: Evaluate whether to wake up the system or not (security concerns)!
|
|
||||||
#
|
|
||||||
time.sleep(ELAPSED_TIME)
|
|
||||||
|
|
||||||
|
|
||||||
except Exception,e:
|
|
||||||
Logger.log(action='error',value=e.message)
|
|
||||||
print e
|
|
||||||
#break
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
pass
|
|
||||||
if __name__ == '__main__' :
|
|
||||||
#
|
|
||||||
#
|
|
||||||
if 'path' in SYS_ARGS :
|
|
||||||
path = SYS_ARGS['path']
|
|
||||||
f = open(path)
|
|
||||||
SYS_ARGS = json.loads(f.read())
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
Logger.init('data-collector')
|
|
||||||
collector = Collector()
|
|
||||||
collector.run()
|
|
||||||
else:
|
|
||||||
print (h)
|
|
@ -1,174 +0,0 @@
|
|||||||
"""
|
|
||||||
Steve L. Nyemba <steve@the-phi.com>
|
|
||||||
The Phi Technology - Smart Top
|
|
||||||
|
|
||||||
This program is the core for evaluating folders and applications. Each class is specialized to generate a report in a pandas data-frame
|
|
||||||
The classes will focus on Apps, Folders and Protocols
|
|
||||||
- SmartTop.get(**args)
|
|
||||||
@TODO:
|
|
||||||
Protocols (will be used in anomaly detection)
|
|
||||||
"""
|
|
||||||
from __future__ import division
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import numpy as np
|
|
||||||
import sys
|
|
||||||
import pandas as pd
|
|
||||||
import datetime
|
|
||||||
class SmartTop:
|
|
||||||
def __init__(self,**args):
|
|
||||||
self.node = args['node']
|
|
||||||
def get(self,**args):
|
|
||||||
return None
|
|
||||||
|
|
||||||
class Apps(SmartTop) :
|
|
||||||
def __init__(self,**args):
|
|
||||||
"""
|
|
||||||
This class will process a system command and parse the outpout accordingly given a parser
|
|
||||||
@param parse is a parser pointer
|
|
||||||
"""
|
|
||||||
SmartTop.__init__(self,**args)
|
|
||||||
self.cmd = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'"
|
|
||||||
self.xchar = ';'
|
|
||||||
|
|
||||||
def get_app(self,stream):
|
|
||||||
index = 1 if os.path.exists(" ".join(stream[:1])) else len(stream)-1
|
|
||||||
|
|
||||||
cmd = " ".join(stream[:index]) if index > 0 else " ".join(stream)
|
|
||||||
|
|
||||||
if ' ' in cmd.split('/')[len(cmd.split('/'))-1] :
|
|
||||||
p = cmd.split('/')[len(cmd.split('/'))-1].split(' ')
|
|
||||||
name = p[0]
|
|
||||||
args = " ".join(p[1:])
|
|
||||||
else:
|
|
||||||
name = cmd.split('/')[len(cmd.split('/'))-1]
|
|
||||||
args = " ".join(stream[index:]) if index > 0 else ""
|
|
||||||
return [name,cmd,args]
|
|
||||||
def to_pandas(self,m):
|
|
||||||
"""
|
|
||||||
This function will convert the output of ps to a data-frame
|
|
||||||
@param m raw matrix i.e list of values like a csv
|
|
||||||
|
|
||||||
"""
|
|
||||||
d = datetime.datetime.now().strftime('%m-%d-%Y')
|
|
||||||
t = datetime.datetime.now().strftime('%H:%M:%S')
|
|
||||||
m = [item for item in m if len(item) != len (m[0])]
|
|
||||||
m = "\n".join(m[1:])
|
|
||||||
df = pd.read_csv(pd.compat.StringIO(m),sep=self.xchar)
|
|
||||||
df['date'] = np.repeat(d,df.shape[0])
|
|
||||||
df['time'] = np.repeat(t,df.shape[0])
|
|
||||||
df['node'] = np.repeat(self.node,df.shape[0])
|
|
||||||
df.columns =['pid','user','mem','cpu','status','started','name','cmd','args','date','time','node']
|
|
||||||
return df
|
|
||||||
def empty(self,name):
|
|
||||||
return pd.DataFrame([{"pid":None,"user":None,"mem":0,"cpu":0,"status":"X","started":None,"name":name,"cmd":None,"args":None,"date":None,"time":None,"node":self.node}])
|
|
||||||
def parse(self,rows):
|
|
||||||
m = []
|
|
||||||
TIME_INDEX = 5
|
|
||||||
ARGS_INDEX = 6
|
|
||||||
|
|
||||||
for item in rows :
|
|
||||||
if rows.index(item) != 0 :
|
|
||||||
parts = item.split(self.xchar)
|
|
||||||
row = parts[:TIME_INDEX]
|
|
||||||
row.append(' '.join(parts[TIME_INDEX:ARGS_INDEX]))
|
|
||||||
row += self.get_app(parts[ARGS_INDEX:])
|
|
||||||
else:
|
|
||||||
row = item.split(self.xchar)
|
|
||||||
row = (self.xchar.join(row)).strip()
|
|
||||||
if len(row.replace(";","")) > 0 :
|
|
||||||
m.append(row)
|
|
||||||
|
|
||||||
return m
|
|
||||||
def get(self,**args):
|
|
||||||
"""
|
|
||||||
This function returns a the output of a command to the calling code that is piped into the class
|
|
||||||
The output will be stored in a data frame with columns
|
|
||||||
@
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
|
|
||||||
handler = subprocess.Popen(self.cmd,shell=True,stdout=subprocess.PIPE)
|
|
||||||
stream = handler.communicate()[0]
|
|
||||||
rows = stream.split('\n')
|
|
||||||
df = self.to_pandas(self.parse(rows))
|
|
||||||
r = pd.DataFrame()
|
|
||||||
if 'filter' in args :
|
|
||||||
pattern = "|".join(args['filter'])
|
|
||||||
i = df.cmd.str.contains(pattern)
|
|
||||||
r = df[i].copy()
|
|
||||||
r.index = np.arange(0,r.shape[0])
|
|
||||||
ii= (1 + np.array(i)*-1) == 1
|
|
||||||
other = pd.DataFrame(df[ii].sum()).T.copy()
|
|
||||||
other.index = np.arange(0,other.shape[0])
|
|
||||||
other.user = other.name = other.status = other.cmd = other.args = 'other'
|
|
||||||
other.started = other.pid = -1
|
|
||||||
other = other[other.columns[1:]]
|
|
||||||
for name in args['filter'] :
|
|
||||||
i = r.cmd.str.contains(str(name.strip()),case=False,na=False)
|
|
||||||
if i.sum() == 0:
|
|
||||||
r = r.append(self.empty(name),sort=False)
|
|
||||||
else :
|
|
||||||
pass
|
|
||||||
# r[i].update (pd.DataFrame({"name":np.repeat(name,r.shape[0])}))
|
|
||||||
r.loc[i, 'name'] = np.repeat(name,i.sum())
|
|
||||||
# r.loc[i].name = name
|
|
||||||
|
|
||||||
|
|
||||||
r = r.append(other,sort=False)
|
|
||||||
r.index = np.arange(r.shape[0])
|
|
||||||
|
|
||||||
return r
|
|
||||||
except Exception,e:
|
|
||||||
print (e)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
class Folders(SmartTop):
|
|
||||||
"""
|
|
||||||
This class will assess a folder and produce a report in a data-frame that can be later on used for summary statistics
|
|
||||||
"""
|
|
||||||
def __init__(self,**args):
|
|
||||||
SmartTop.__init__(self,**args)
|
|
||||||
|
|
||||||
def _get(self,dir_path,r=[]):
|
|
||||||
for child in os.listdir(dir_path):
|
|
||||||
path = os.path.join(dir_path, child)
|
|
||||||
if os.path.isdir(path):
|
|
||||||
self._get(path,r)
|
|
||||||
|
|
||||||
else:
|
|
||||||
size = os.path.getsize(path)
|
|
||||||
file_date = os.path.getatime(path)
|
|
||||||
file_date = datetime.datetime.fromtimestamp(file_date)
|
|
||||||
now = datetime.datetime.now()
|
|
||||||
age = (now - file_date ).days
|
|
||||||
name = os.path.basename(path)
|
|
||||||
r.append({"name":name,"path":path,"size":size,"age":age,"date":now.strftime('%m-%d-%Y'),"time":now.strftime('%H:%M:%S'),"node":self.node })
|
|
||||||
return r
|
|
||||||
|
|
||||||
def get(self,**args):
|
|
||||||
# path = args['path']
|
|
||||||
|
|
||||||
if isinstance(args['path'],list) == False:
|
|
||||||
paths = [args['path']]
|
|
||||||
else:
|
|
||||||
paths = args['path']
|
|
||||||
paths = paths
|
|
||||||
_out = pd.DataFrame()
|
|
||||||
for path in paths :
|
|
||||||
name = os.path.basename(path)
|
|
||||||
if os.path.exists(path) :
|
|
||||||
#
|
|
||||||
# If the folder does NOT exists it should not be treated.
|
|
||||||
#
|
|
||||||
rows = self._get(path)
|
|
||||||
if len(rows) > 0 :
|
|
||||||
r = pd.DataFrame(rows)
|
|
||||||
r = pd.DataFrame([{"name":name,"path":path,"files":r.shape[0],"age_in_days":r.age.mean(),"size_in_kb":r['size'].sum(),"date":r.date.max(),"time":r.time.max(),"node":r.node.max()}])
|
|
||||||
_out = _out.append(r,sort=False)
|
|
||||||
#
|
|
||||||
# @TODO: The state of the hard-drive would be a good plus
|
|
||||||
# os.system('df -h /')
|
|
||||||
_out.index = np.arange(0,_out.shape[0])
|
|
||||||
return _out
|
|
@ -1 +0,0 @@
|
|||||||
|
|
@ -1 +0,0 @@
|
|||||||
|
|
@ -1,368 +0,0 @@
|
|||||||
"""
|
|
||||||
This class is designed to be an actor class i.e it will undertake certain actions given an event detected
|
|
||||||
The platform has 2 main sections (detection & analysis).
|
|
||||||
Action Types (Actors):
|
|
||||||
- Alert : Sends an email or Webhook
|
|
||||||
- Apps : Kill, Start
|
|
||||||
- Folder: Archive, Delete (all, age, size)
|
|
||||||
By design we are to understand that a message is structured as follows:
|
|
||||||
{to,from,content} with content either being an arbitrary stream (or JSON)
|
|
||||||
@TODO:
|
|
||||||
- upgrade to python 3.x
|
|
||||||
"""
|
|
||||||
import json
|
|
||||||
from threading import Thread
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
import subprocess
|
|
||||||
import re
|
|
||||||
from monitor import ProcessCounter
|
|
||||||
from utils.transport import QueueListener, QueueWriter, QueueReader
|
|
||||||
from utils.params import PARAMS
|
|
||||||
import smtplib
|
|
||||||
from email.mime.multipart import MIMEMultipart
|
|
||||||
from email.mime.text import MIMEText
|
|
||||||
from datetime import datetime
|
|
||||||
from StringIO import StringIO
|
|
||||||
from utils.services import Dropbox, Google
|
|
||||||
class Actor():
|
|
||||||
@staticmethod
|
|
||||||
def instance(name,args,logger=None):
|
|
||||||
"""
|
|
||||||
This function is a singleton that acts as a factory object for all the instances of this subclass
|
|
||||||
@param name name of the class to instantiate
|
|
||||||
@param args arguments to be passed in {configuration}
|
|
||||||
"""
|
|
||||||
r = []
|
|
||||||
if not isinstance(name,list):
|
|
||||||
name = [name]
|
|
||||||
for id in name :
|
|
||||||
try:
|
|
||||||
o = eval("".join([id,"()"]))
|
|
||||||
o.Initialize(args,logger)
|
|
||||||
r.append(o)
|
|
||||||
except Exception,e:
|
|
||||||
if logger is not None :
|
|
||||||
logger.log(subject='Actor',object='Factory',action='error',value=e.message)
|
|
||||||
|
|
||||||
print str(e)
|
|
||||||
return r[0] if len(r) == 1 else r
|
|
||||||
def __init__(self):
|
|
||||||
"""
|
|
||||||
Initializing the class with configuration. The configuration will be specific to each subclass
|
|
||||||
@param args arguments the class needs to be configured
|
|
||||||
"""
|
|
||||||
|
|
||||||
pass
|
|
||||||
def getName(self):
|
|
||||||
return self.__class__.__name__.lower()
|
|
||||||
|
|
||||||
# def getIdentifier(self):
|
|
||||||
# return self.__class__.__name__.lower()
|
|
||||||
|
|
||||||
def Initialize(self,args,logger=None):
|
|
||||||
self.config = args
|
|
||||||
self.logger = logger
|
|
||||||
|
|
||||||
def isValid(self,**item):
|
|
||||||
return False
|
|
||||||
|
|
||||||
def execute(self,cmd):
|
|
||||||
stream = None
|
|
||||||
try:
|
|
||||||
# subprocess.call (cmd,shell=False)
|
|
||||||
out = subprocess.Popen(cmd,stdout=subprocess.PIPE)
|
|
||||||
#stream = handler.communicate()[0]
|
|
||||||
except Exception,e:
|
|
||||||
pass
|
|
||||||
def post(self,**args):
|
|
||||||
pass
|
|
||||||
def log(self,**args):
|
|
||||||
if self.logger :
|
|
||||||
args['subject'] = self.getName()
|
|
||||||
self.logger.log(args)
|
|
||||||
class Apps(Actor) :
|
|
||||||
"""
|
|
||||||
This class is designed to handle application, restart, if need be.
|
|
||||||
conf{app-name:{args}}
|
|
||||||
"""
|
|
||||||
def __init__(self):
|
|
||||||
Actor.__init__(self)
|
|
||||||
# self.ng = None
|
|
||||||
|
|
||||||
def isValid(self,**args):
|
|
||||||
"""
|
|
||||||
We insure that the provided application exists and that the payload is correct
|
|
||||||
The class will only respond to reboot,kill,start actions
|
|
||||||
p validate the payload
|
|
||||||
q validate the app can be restarted
|
|
||||||
|
|
||||||
@NOTE: killing the application has no preconditions/requirements
|
|
||||||
"""
|
|
||||||
params = args['params']
|
|
||||||
action = args['action']
|
|
||||||
p = len(set(params.keys()) & set(['cmd','label'])) == 2
|
|
||||||
q = False
|
|
||||||
r = action in ['reboot','kill','start']
|
|
||||||
if p :
|
|
||||||
q = os.path.exists(params['cmd'])
|
|
||||||
return p and q and r
|
|
||||||
def init(self,action,params) :
|
|
||||||
"""
|
|
||||||
This function will initialize the the actor with applications and associated arguments
|
|
||||||
@param args {"apps_o":"","app_x":params}
|
|
||||||
"""
|
|
||||||
self.action = action
|
|
||||||
self.params = params
|
|
||||||
self.log(action='init',object=action,value=params)
|
|
||||||
|
|
||||||
def startup(self,cmd) :
|
|
||||||
"""
|
|
||||||
This function is intended to start a program given the configuration
|
|
||||||
@TODO We need to find the command in case the app has crashed
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
print""
|
|
||||||
print cmd
|
|
||||||
os.system(cmd +" &")
|
|
||||||
self.log(action='startup',value=cmd)
|
|
||||||
except Exception, e:
|
|
||||||
print e
|
|
||||||
|
|
||||||
def kill(self,name) :
|
|
||||||
"""
|
|
||||||
kill processes given the name, The function will not be case sensitive and partial names are accepted
|
|
||||||
@NOTE: Make sure the reference to the app is not ambiguous
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
args = "".join(['ps -eo pid,command|grep -Ei "',name.lower(),'"|grep -E "^ {0,}[0-9]+" -o|xargs kill -9'])
|
|
||||||
|
|
||||||
#self.execute([args])
|
|
||||||
subprocess.call([args],shell=True)
|
|
||||||
except Exception,e:
|
|
||||||
print e
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
__action = str(self.action).strip()
|
|
||||||
__params = dict(self.params)
|
|
||||||
pointer = None
|
|
||||||
if __action == 'reboot' :
|
|
||||||
def pointer():
|
|
||||||
self.kill(__params['label'])
|
|
||||||
self.startup(__params['cmd'])
|
|
||||||
elif __action == 'kill':
|
|
||||||
def pointer():
|
|
||||||
self.kill(__params['label'])
|
|
||||||
elif __action =='start':
|
|
||||||
def pointer() :
|
|
||||||
self.startup(__params['cmd'])
|
|
||||||
if pointer :
|
|
||||||
thread = Thread(target=pointer)
|
|
||||||
thread.start()
|
|
||||||
# pointer()
|
|
||||||
def analyze(self,logs) :
|
|
||||||
"""
|
|
||||||
This function is designed to analyze a few logs and take appropriate action
|
|
||||||
@param logs logs of application/process data; folder analysis or sandbox analysis
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
# for item in logs :
|
|
||||||
# name = item['label']
|
|
||||||
# if self.can_start(name) :
|
|
||||||
# self.startup(name)
|
|
||||||
# #
|
|
||||||
|
|
||||||
|
|
||||||
class Mailer (Actor):
|
|
||||||
"""
|
|
||||||
This class is a mailer agent
|
|
||||||
"""
|
|
||||||
def __init__(self):
|
|
||||||
Actor.__init__(self)
|
|
||||||
"""
|
|
||||||
conf = {uid:<account>,host:<host>,port:<port>,password:<password>}
|
|
||||||
"""
|
|
||||||
def init(self,conf) :
|
|
||||||
self.uid = conf['uid']
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
|
|
||||||
self.handler = smtplib.SMTP_SSL(conf['host'],conf['port'])
|
|
||||||
r = self.handler.login(self.uid,conf['password'])
|
|
||||||
#
|
|
||||||
# @TODO: Check the status of the authentication
|
|
||||||
# If not authenticated the preconditions have failed
|
|
||||||
#
|
|
||||||
except Exception,e:
|
|
||||||
print str(e)
|
|
||||||
self.handler = None
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def send(self,**args) :
|
|
||||||
subject = args['subject']
|
|
||||||
message = args['message']
|
|
||||||
to = args['to']
|
|
||||||
if '<' in message and '>' in message :
|
|
||||||
message = MIMEText(message,'html')
|
|
||||||
else:
|
|
||||||
message = MIMEText(message,'plain')
|
|
||||||
message['From'] = self.uid
|
|
||||||
message['To'] = to
|
|
||||||
message['Subject'] = subject
|
|
||||||
return self.handler.sendmail(self.uid,to,message.as_string())
|
|
||||||
def close(self):
|
|
||||||
self.handler.quit()
|
|
||||||
|
|
||||||
|
|
||||||
class Folders(Actor):
|
|
||||||
def __init__(self):
|
|
||||||
Actor.__init__(self)
|
|
||||||
"""
|
|
||||||
This is designed to handle folders i.e cleaning/archiving the folders
|
|
||||||
if the user does NOT have any keys to cloud-view than she will not be able to archive
|
|
||||||
{threshold:value}
|
|
||||||
@params threshold in terms of size, or age. It will be applied to all folders
|
|
||||||
"""
|
|
||||||
def init(self,action,params):
|
|
||||||
self.action = action
|
|
||||||
# print args
|
|
||||||
# def init(self,args):
|
|
||||||
"""
|
|
||||||
This is initialized with parameters from the plan.
|
|
||||||
The parameters should be specific to the actor (folder)
|
|
||||||
folder_size
|
|
||||||
"""
|
|
||||||
#self.lfolders = args['folders'] #config['folders']
|
|
||||||
#self.action = args['action'] #{clear,archive} config['actions']['folders']
|
|
||||||
|
|
||||||
plan = params['plan']
|
|
||||||
self.threshold = self.get_size( plan['folder_size']) #self.config['threshold'])
|
|
||||||
# self.action = action
|
|
||||||
self.params = params
|
|
||||||
|
|
||||||
|
|
||||||
def isValid(self,**args):
|
|
||||||
action = args['action']
|
|
||||||
|
|
||||||
|
|
||||||
params = args['params']
|
|
||||||
p = len(set(action) & set(['clean','archive','backup'])) > 0
|
|
||||||
q = False
|
|
||||||
r = False
|
|
||||||
if p :
|
|
||||||
q = len(set(params.keys()) & set( ['label','folder'])) > 0
|
|
||||||
if q :
|
|
||||||
folder = params['label'] if 'label' in params else params['folder']
|
|
||||||
r = os.path.exists(folder)
|
|
||||||
|
|
||||||
return p and q and r
|
|
||||||
def archive(self,item):
|
|
||||||
"""
|
|
||||||
This function will archive all files in a given folder
|
|
||||||
@pre : isValid
|
|
||||||
"""
|
|
||||||
|
|
||||||
folder = item['label']
|
|
||||||
name = folder.split(os.sep)
|
|
||||||
name = name[len(name)-1]
|
|
||||||
date = str(datetime.now()).replace(' ','@')#''.join([str(i) for i in item['date'].values()])
|
|
||||||
|
|
||||||
# signature='-'.join([name,date,str(item['stats']['file_count']),'files'])
|
|
||||||
signature='-'.join([name,date])
|
|
||||||
tarball=os.sep.join([folder,'..',signature])
|
|
||||||
shutil.make_archive(tarball,'tar',folder)
|
|
||||||
|
|
||||||
#self.clean(item)
|
|
||||||
#
|
|
||||||
# @TODO: The archive can be uploaded to the cloud or else where
|
|
||||||
# @param id cloud service idenfier {dropbox,box,google-drive,one-drive}
|
|
||||||
# @param key authorization key for the given service
|
|
||||||
#
|
|
||||||
pass
|
|
||||||
return tarball+".tar"
|
|
||||||
def backup(self,tarball):
|
|
||||||
"""
|
|
||||||
This function will initiate backup to the cloud given
|
|
||||||
"""
|
|
||||||
if os.path.exists(tarball) :
|
|
||||||
|
|
||||||
key = self.params['key']
|
|
||||||
|
|
||||||
sid = self.params['user']['sid']
|
|
||||||
if sid == 'dropbox' :
|
|
||||||
cloud = Dropbox()
|
|
||||||
elif sid == 'google-drive' :
|
|
||||||
cloud = Google()
|
|
||||||
cloud.init(key)
|
|
||||||
file = open(tarball)
|
|
||||||
out = cloud.upload('backup','application/octet-stream',file)
|
|
||||||
file.close()
|
|
||||||
print out
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
print tarball
|
|
||||||
print self.params['user']['sid']
|
|
||||||
print self.params['key']
|
|
||||||
#
|
|
||||||
# let's upload to the cloud
|
|
||||||
pass
|
|
||||||
def clean(self,item):
|
|
||||||
"""
|
|
||||||
This function consists in deleting files from a given folder
|
|
||||||
"""
|
|
||||||
rpath = item['label']
|
|
||||||
files = os.listdir(item['label'])
|
|
||||||
for name in list(files) :
|
|
||||||
path = os.sep.join([item['label'],name])
|
|
||||||
if os.path.isdir(path) :
|
|
||||||
shutil.rmtree(path)
|
|
||||||
else:
|
|
||||||
os.remove(path)
|
|
||||||
#
|
|
||||||
#
|
|
||||||
|
|
||||||
def get_size(self,value):
|
|
||||||
"""
|
|
||||||
converts size values into MB and returns the value without units
|
|
||||||
"""
|
|
||||||
units = {'MB':1000,'GB':1000000,'TB':1000000000} # converting to kb
|
|
||||||
key = set(units.keys()) & set(re.split('(\d+)',value.replace(' ','').upper()))
|
|
||||||
|
|
||||||
|
|
||||||
if len(key) == 0:
|
|
||||||
return -1
|
|
||||||
key = key.pop()
|
|
||||||
return float(value.upper().replace('MB','').strip()) * units[key]
|
|
||||||
|
|
||||||
def can_clean(self,item):
|
|
||||||
"""
|
|
||||||
This function returns whether the following :
|
|
||||||
p : folder exists
|
|
||||||
q : has_reached threashold
|
|
||||||
|
|
||||||
@TODO: Add a user defined configuration element to make this happen
|
|
||||||
"""
|
|
||||||
|
|
||||||
p = os.path.exists(item['label']) and item['label'] in self.lfolders
|
|
||||||
q = item['stats']['size']['mean'] >= self.threshold and self.threshold > 0
|
|
||||||
return p and q
|
|
||||||
def run(self):
|
|
||||||
tarball = None
|
|
||||||
if 'archive' in self.action :
|
|
||||||
tarball = self.archive(self.params)
|
|
||||||
|
|
||||||
if 'backup' in self.action and tarball:
|
|
||||||
self.backup(tarball)
|
|
||||||
if 'delete' in self.action and self.can_clean():
|
|
||||||
self.clean()
|
|
||||||
def analyze(self,logs):
|
|
||||||
r = {'clean':self.clean,'archive':self.archive}
|
|
||||||
self.lfolders = [ folder['label'] for folder in logs]
|
|
||||||
#for item in logs :
|
|
||||||
|
|
||||||
# if self.can_clean(item) :
|
|
||||||
# self.archive(item)
|
|
||||||
# self.clean(item)
|
|
@ -1,132 +0,0 @@
|
|||||||
"""
|
|
||||||
This file encapsulates a class that is intended to perform learning
|
|
||||||
"""
|
|
||||||
from __future__ import division
|
|
||||||
import numpy as np
|
|
||||||
from sklearn import linear_model
|
|
||||||
from threading import Thread,RLock
|
|
||||||
from utils.transport import *
|
|
||||||
from utils.ml import AnomalyDetection,ML
|
|
||||||
from utils.params import PARAMS
|
|
||||||
import time
|
|
||||||
class BaseLearner(Thread):
|
|
||||||
def __init__(self,lock) :
|
|
||||||
Thread.__init__(self)
|
|
||||||
path = PARAMS['path']
|
|
||||||
self.name = self.__class__.__name__.lower()
|
|
||||||
self.rclass= None
|
|
||||||
self.wclass= None
|
|
||||||
self.rw_args=None
|
|
||||||
if os.path.exists(path) :
|
|
||||||
f = open(path)
|
|
||||||
self.config = json.loads(f.read())
|
|
||||||
f.close()
|
|
||||||
self.rclass = self.config['store']['class']['read']
|
|
||||||
self.wclass = self.config['store']['class']['write']
|
|
||||||
self.rw_args = self.config['store']['args']
|
|
||||||
|
|
||||||
else:
|
|
||||||
self.config = None
|
|
||||||
self.lock = lock
|
|
||||||
self.factory = DataSourceFactory()
|
|
||||||
self.quit = False
|
|
||||||
"""
|
|
||||||
This function is designed to stop processing gracefully
|
|
||||||
|
|
||||||
"""
|
|
||||||
def stop(self):
|
|
||||||
self.quit = True
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
This class is intended to apply anomaly detection to various areas of learning
|
|
||||||
The areas of learning that will be skipped are :
|
|
||||||
['_id','_rev','learn'] ...
|
|
||||||
|
|
||||||
@TODO:
|
|
||||||
- Find a way to perform dimensionality reduction if need be
|
|
||||||
"""
|
|
||||||
class Anomalies(BaseLearner) :
|
|
||||||
def __init__(self,lock):
|
|
||||||
BaseLearner.__init__(self,lock)
|
|
||||||
if self.config :
|
|
||||||
#
|
|
||||||
# Initializing data store & factory class
|
|
||||||
#
|
|
||||||
self.id = self.config['id']
|
|
||||||
self.apps = self.config['procs'] if 'procs' in self.config else []
|
|
||||||
self.rclass = self.config['store']['class']['read']
|
|
||||||
self.wclass = self.config['store']['class']['write']
|
|
||||||
self.rw_args = self.config['store']['args']
|
|
||||||
# self.factory = DataSourceFactory()
|
|
||||||
self.quit = False
|
|
||||||
# self.lock = lock
|
|
||||||
def format(self,stream):
|
|
||||||
pass
|
|
||||||
def stop(self):
|
|
||||||
self.quit = True
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
DELAY = self.config['delay'] * 60
|
|
||||||
reader = self.factory.instance(type=self.rclass,args=self.rw_args)
|
|
||||||
data = reader.read()
|
|
||||||
key = 'apps@'+self.id
|
|
||||||
if key in data:
|
|
||||||
rdata = data[key]
|
|
||||||
features = ['memory_usage','cpu_usage']
|
|
||||||
yo = {"1":["running"],"name":"status"}
|
|
||||||
while self.quit == False :
|
|
||||||
print ' *** ',self.name, ' ' , str(datetime.today())
|
|
||||||
for app in self.apps:
|
|
||||||
print '\t',app,str(datetime.today()),' ** ',app
|
|
||||||
logs = ML.Filter('label',app,rdata)
|
|
||||||
|
|
||||||
if logs :
|
|
||||||
handler = AnomalyDetection()
|
|
||||||
value = handler.learn(logs,'label',app,features,yo)
|
|
||||||
if value is not None:
|
|
||||||
value = dict(value,**{"features":features})
|
|
||||||
value = dict({"id":self.id},**value)
|
|
||||||
#r[id][app] = value
|
|
||||||
self.lock.acquire()
|
|
||||||
writer = self.factory.instance(type=self.wclass,args=self.rw_args)
|
|
||||||
writer.write(label='learn',row=value)
|
|
||||||
self.lock.release()
|
|
||||||
#
|
|
||||||
if 'MONITOR_CONFIG_PATH' in os.environ :
|
|
||||||
break
|
|
||||||
time.sleep(DELAY)
|
|
||||||
print ' *** Exiting ',self.name.replace('a','A')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
Let's estimate how many files we will have for a given date
|
|
||||||
y = ax + b with y: number files, x: date, y: Number of files
|
|
||||||
"""
|
|
||||||
class Regression(BaseLearner):
|
|
||||||
def __init__(self,lock):
|
|
||||||
BaseLearner.__init__(self,lock)
|
|
||||||
self.folders = self.config['folders']
|
|
||||||
self.id = self.config['id']
|
|
||||||
def run(self):
|
|
||||||
DELAY = self.config['delay'] * 60
|
|
||||||
reader = self.factory.instance(type=self.rclass,args=self.rw_args)
|
|
||||||
data = reader.read()
|
|
||||||
if 'folders' in data :
|
|
||||||
data = ML.Filter('id',self.id,data['folders'])
|
|
||||||
xo = ML.Extract(['date'],data)
|
|
||||||
yo = ML.Extract(['count'],data)
|
|
||||||
|
|
||||||
|
|
||||||
pass
|
|
||||||
# print np.var(xo,yo)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__' :
|
|
||||||
lock = RLock()
|
|
||||||
thread = Anomalies(lock)
|
|
||||||
thread.start()
|
|
@ -1,220 +0,0 @@
|
|||||||
"""
|
|
||||||
Features :
|
|
||||||
- data collection
|
|
||||||
- detection, reboot (service)
|
|
||||||
- respond to commands (service)
|
|
||||||
"""
|
|
||||||
#from threading import Thread, RLock
|
|
||||||
from __future__ import division
|
|
||||||
import os
|
|
||||||
import json
|
|
||||||
import time
|
|
||||||
from datetime import datetime
|
|
||||||
from utils.transport import *
|
|
||||||
import monitor
|
|
||||||
import requests
|
|
||||||
from threading import Thread
|
|
||||||
class Manager() :
|
|
||||||
def version(self):
|
|
||||||
return 1.1
|
|
||||||
"""
|
|
||||||
|
|
||||||
delay : <value>
|
|
||||||
limit : <value>
|
|
||||||
scope : apps,folders,learner,sandbox
|
|
||||||
"""
|
|
||||||
def __init__(self):
|
|
||||||
self.factory = DataSourceFactory()
|
|
||||||
def set(self,name,value):
|
|
||||||
setattr(name,value)
|
|
||||||
def init(self,**args) :
|
|
||||||
self.id = args['node']
|
|
||||||
self.agents = args['agents']
|
|
||||||
self.config = dict(args['config'])
|
|
||||||
self.key = args['key']
|
|
||||||
self.actors = args['actors']
|
|
||||||
self.plan = self.config['plan']
|
|
||||||
|
|
||||||
self.DELAY = int(self.plan['metadata']['delay'])
|
|
||||||
self.host = args['host']
|
|
||||||
self.update() #-- Initializing status information
|
|
||||||
_args={"host":"dev.the-phi.com","qid":self.id,"uid":self.key}
|
|
||||||
#
|
|
||||||
# Connecting to the messaging service
|
|
||||||
|
|
||||||
self.qlistener = self.factory.instance(type="QueueListener",args=_args)
|
|
||||||
self.qlistener.callback = self.callback
|
|
||||||
self.qlistener.init(self.id)
|
|
||||||
|
|
||||||
# self.qlistener.read()
|
|
||||||
thread = (Thread(target=self.qlistener.read))
|
|
||||||
thread.start()
|
|
||||||
|
|
||||||
def update(self) :
|
|
||||||
"""
|
|
||||||
This method inspect the plans for the current account and makes sure it can/should proceed
|
|
||||||
The user must be subscribed and to the service otherwise this is not going to work
|
|
||||||
"""
|
|
||||||
|
|
||||||
url="http://:host/monitor/init/collector".replace(':host',self.host)
|
|
||||||
|
|
||||||
r = requests.post(url,headers={"key":self.key,"id":self.id})
|
|
||||||
r = json.loads(r.text)
|
|
||||||
# meta = [item['metadata'] for item in plans if item['status']=='active' ]
|
|
||||||
self.plan = r['plan']
|
|
||||||
meta = self.plan['metadata']
|
|
||||||
|
|
||||||
if meta :
|
|
||||||
self.DELAY = 60* int(meta['delay'])
|
|
||||||
self.LIMIT = int(meta['limit'])
|
|
||||||
#dbname = [item['name'] for item in plans if int(item['metadata']['limit']) == self.LIMIT][0]
|
|
||||||
#self.config['store']['args']['dbname'] = dbname
|
|
||||||
|
|
||||||
else:
|
|
||||||
self.DELAY = -1
|
|
||||||
self.LIMIT = -1
|
|
||||||
|
|
||||||
#self.filter(meta)
|
|
||||||
#
|
|
||||||
# We are removing all that is not necessary i.e making sure the features matches the plan user has paid for
|
|
||||||
#
|
|
||||||
self.agents = self.filter('agents',meta,self.agents)
|
|
||||||
self.actors = self.filter('actors',meta,self.actors)
|
|
||||||
self.setup(meta)
|
|
||||||
|
|
||||||
# def filter_collectors(self,meta) :
|
|
||||||
# """
|
|
||||||
# remove collectors that are not specified by the plan
|
|
||||||
# Note that the agents (collectors) have already been initialized ?
|
|
||||||
# """
|
|
||||||
# values = meta['agents'].replace(' ','').split(',')
|
|
||||||
# self.agents = [agent for agent in self.agents if agent.getName() in values]
|
|
||||||
|
|
||||||
|
|
||||||
# def filter_actors(self,meta):
|
|
||||||
# """
|
|
||||||
# removes actors that are NOT specified by the subscription plan
|
|
||||||
# Note that the actor have already been instatiated and need initialization
|
|
||||||
# """
|
|
||||||
# values = meta['actors'].replace(' ','').split('.')
|
|
||||||
# self.actors = [actor for actor in self.actors if actor.getName() in values]
|
|
||||||
|
|
||||||
def filter(self,id,meta,objects):
|
|
||||||
"""
|
|
||||||
This function filters the agents/actors given what is available in the user's plan
|
|
||||||
|
|
||||||
"""
|
|
||||||
values = meta[id].replace(' ','').split(',')
|
|
||||||
return [item for item in objects if item.getName() in values]
|
|
||||||
|
|
||||||
def setup(self,meta) :
|
|
||||||
# conf = {"folders":None,"apps":None}
|
|
||||||
# read_class = self.config['store']['class']['read']
|
|
||||||
# read_args = self.config['store']['args']
|
|
||||||
|
|
||||||
|
|
||||||
# args = None
|
|
||||||
# couchdb = self.factory.instance(type=read_class,args=read_args)
|
|
||||||
# args = couchdb.view('config/apps',key=self.key)
|
|
||||||
# if len(args.keys()) > 0 :
|
|
||||||
# self.apply_setup('apps',args)
|
|
||||||
# args = couchdb.view('config/folders',key=self.key)
|
|
||||||
# if 'folder_size' not in meta :
|
|
||||||
# # args['threshold'] = meta['folder_size']
|
|
||||||
# self.apply_setup('folders',meta)
|
|
||||||
#self.apply_setup('folders',meta)
|
|
||||||
#@TODO: For now app actors don't need any particular initialization
|
|
||||||
pass
|
|
||||||
|
|
||||||
def apply_setup(self,name,args) :
|
|
||||||
for actor in self.actors :
|
|
||||||
if args is not None and actor.getName() == name and len(args.keys()) > 0:
|
|
||||||
actor.init(args)
|
|
||||||
|
|
||||||
def isvalid(self):
|
|
||||||
self.update()
|
|
||||||
return self.DELAY > -1 and self.LIMIT > -1
|
|
||||||
def post(self,row) :
|
|
||||||
"""
|
|
||||||
This function is designed to take appropriate action if a particular incident has been detected
|
|
||||||
@param label
|
|
||||||
@param row data pulled extracted
|
|
||||||
"""
|
|
||||||
message = {}
|
|
||||||
message['action'] = 'reboot'
|
|
||||||
message['node'] = label
|
|
||||||
|
|
||||||
def callback(self,channel,method,header,stream):
|
|
||||||
"""
|
|
||||||
This function enables the manager to be able to receive messages and delegate them to the appropriate actor
|
|
||||||
@channel
|
|
||||||
"""
|
|
||||||
message = json.loads(stream)
|
|
||||||
#
|
|
||||||
# we should inspect the message and insure it has reached the appropriate recepient
|
|
||||||
#
|
|
||||||
if 'node' in message and message['node'] == self.id :
|
|
||||||
action = message['action']
|
|
||||||
params = message['params']
|
|
||||||
# params['plan'] = self.plan['metadata']
|
|
||||||
self.delegate(action,params)
|
|
||||||
|
|
||||||
def delegate(self,action,params):
|
|
||||||
for actor in self.actors :
|
|
||||||
|
|
||||||
if actor.isValid(action=action,params=params) :
|
|
||||||
|
|
||||||
actor.init(action,params)
|
|
||||||
actor.run()
|
|
||||||
|
|
||||||
break
|
|
||||||
pass
|
|
||||||
def run(self):
|
|
||||||
#DELAY=35*60 #- 35 Minutes
|
|
||||||
#LIMIT=1000
|
|
||||||
COUNT = 0
|
|
||||||
COUNT_STOP = int(24*60/ self.DELAY)
|
|
||||||
write_class = self.config['store']['class']['write']
|
|
||||||
read_args = self.config['store']['args']
|
|
||||||
|
|
||||||
while True :
|
|
||||||
COUNT += 1
|
|
||||||
if COUNT > COUNT_STOP :
|
|
||||||
if self.isvalid() :
|
|
||||||
COUNT = 0
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
for agent in self.agents :
|
|
||||||
data = agent.composite()
|
|
||||||
label = agent.getName()
|
|
||||||
node = '@'.join([label,self.id])
|
|
||||||
row = {}
|
|
||||||
|
|
||||||
if label == 'folders':
|
|
||||||
row = [ dict({"id":self.id}, **_row) for _row in data]
|
|
||||||
else:
|
|
||||||
#label = id
|
|
||||||
row = data
|
|
||||||
#
|
|
||||||
# @TODO:
|
|
||||||
# This data should be marked if it has been flagged for reboot
|
|
||||||
#
|
|
||||||
if type(row)==list and len(row) == 0 :
|
|
||||||
|
|
||||||
continue
|
|
||||||
#
|
|
||||||
#
|
|
||||||
index = self.agents.index(agent)
|
|
||||||
|
|
||||||
if len(self.actors) > index and self.actors[index].getName() == agent.getName() :
|
|
||||||
actor = self.actors[index]
|
|
||||||
actor.analyze(row)
|
|
||||||
|
|
||||||
# self.lock.acquire()
|
|
||||||
store = self.factory.instance(type=write_class,args=read_args)
|
|
||||||
store.flush(size=self.LIMIT)
|
|
||||||
store.write(label=node,row=[row])
|
|
||||||
# self.lock.release()
|
|
||||||
print (["Falling asleep ",self.DELAY/60])
|
|
||||||
time.sleep(self.DELAY)
|
|
||||||
|
|
@ -1,32 +0,0 @@
|
|||||||
import sys
|
|
||||||
PARAMS = {'context':''}
|
|
||||||
if len(sys.argv) > 1:
|
|
||||||
|
|
||||||
N = len(sys.argv)
|
|
||||||
for i in range(1,N):
|
|
||||||
value = None
|
|
||||||
if sys.argv[i].startswith('--'):
|
|
||||||
key = sys.argv[i].replace('-','')
|
|
||||||
PARAMS[key] = 1
|
|
||||||
if i + 1 < N:
|
|
||||||
value = sys.argv[i + 1] = sys.argv[i+1].strip()
|
|
||||||
if key and value:
|
|
||||||
PARAMS[key] = value
|
|
||||||
if key == 'context':
|
|
||||||
PARAMS[key] = ('/'+value).replace('//','/')
|
|
||||||
|
|
||||||
i += 2
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import json
|
|
||||||
from datetime import datetime
|
|
||||||
class Logger :
|
|
||||||
@staticmethod
|
|
||||||
def init(filename):
|
|
||||||
name = "-".join([filename,datetime.now().strftime('%d-%m-%Y')])+".log"
|
|
||||||
logging.basicConfig(filename=name,level=logging.INFO,format="%(message)s")
|
|
||||||
@staticmethod
|
|
||||||
def log(**args) :
|
|
||||||
args['date'] = datetime.now().strftime('%d-%m-%Y %H:%M:%S')
|
|
||||||
logging.info(json.dumps(args))
|
|
||||||
|
|
@ -1,598 +0,0 @@
|
|||||||
"""
|
|
||||||
CloudView Engine 2.0
|
|
||||||
The Phi Technology LLC - Steve L. Nyemba <steve@the-phi.com>
|
|
||||||
|
|
||||||
This is a basic cloud view engine that is designed to be integrated into any service and intended to work for anyone provided they have signed up with the cloud service provider
|
|
||||||
The intent is to make the engine a general purpose engine that can be either deployed as a service (3-launchpad) or integrated as data feed for a third party utility
|
|
||||||
|
|
||||||
"""
|
|
||||||
from __future__ import division
|
|
||||||
from threading import Thread
|
|
||||||
import json
|
|
||||||
import requests
|
|
||||||
from xmljson import yahoo as bf
|
|
||||||
from xml.etree.ElementTree import Element, tostring, fromstring, ElementTree as ET
|
|
||||||
import xmltodict
|
|
||||||
from email.mime.base import MIMEBase
|
|
||||||
from email.mime.multipart import MIMEMultipart
|
|
||||||
from StringIO import StringIO
|
|
||||||
class Cloud:
|
|
||||||
BYTES_TO_GB = 1000000000
|
|
||||||
Config = None
|
|
||||||
STREAMING_URI = None
|
|
||||||
@staticmethod
|
|
||||||
def instance(id,**args):
|
|
||||||
id = id.strip()
|
|
||||||
if id == 'skydrive' :
|
|
||||||
id = 'one-drive'
|
|
||||||
|
|
||||||
handler = None
|
|
||||||
path = args['path'] if 'path' in args else None
|
|
||||||
if not Cloud.Config and path:
|
|
||||||
f = open(path)
|
|
||||||
Cloud.Config = json.loads(f.read())
|
|
||||||
Cloud.STREAMING_URI = str(Cloud.Config['api'])
|
|
||||||
Cloud.Config = Cloud.Config['cloud']
|
|
||||||
f.close()
|
|
||||||
if path and id in Cloud.Config :
|
|
||||||
context = Cloud.Config[id]
|
|
||||||
className = context['class']
|
|
||||||
config = json.dumps(context['config'])
|
|
||||||
handler = eval( "".join([className,"(",config,")"]))
|
|
||||||
|
|
||||||
#
|
|
||||||
# In case a stream was passed in ...
|
|
||||||
#
|
|
||||||
if 'stream' in args:
|
|
||||||
stream = args['stream']
|
|
||||||
context = Cloud.Config[id]
|
|
||||||
className = context['class']
|
|
||||||
|
|
||||||
handler = eval("".join([className,"(None)"]))
|
|
||||||
handler.from_json(stream)
|
|
||||||
#
|
|
||||||
# Once the handler is rovided we must retrieve the service given the key
|
|
||||||
# The key provides information about what files to extract as well as the preconditions
|
|
||||||
# @TODO:
|
|
||||||
# - Keys are maintained within the stripe account/couchdb
|
|
||||||
# -
|
|
||||||
return handler
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.access_token = None
|
|
||||||
self.refresh_token= None
|
|
||||||
self.files = []
|
|
||||||
self.client_id = None
|
|
||||||
self.secret = None
|
|
||||||
self.mfiles = {}
|
|
||||||
self.folders={}
|
|
||||||
|
|
||||||
def to_json(self):
|
|
||||||
object = {}
|
|
||||||
keys = vars(self)
|
|
||||||
for key in keys:
|
|
||||||
value = getattr(self,key)
|
|
||||||
object[key] = value
|
|
||||||
return json.dumps(object)
|
|
||||||
def from_json(self,stream):
|
|
||||||
ref = json.loads(stream) ;
|
|
||||||
for key in ref.keys() :
|
|
||||||
value = ref[key]
|
|
||||||
setattr(self,key,value)
|
|
||||||
# self.access_token = ref['access_token']
|
|
||||||
# self.refesh_token = ref['refresh_token']
|
|
||||||
# self.files = ref['files']
|
|
||||||
"""
|
|
||||||
This function matches a name with a list of possible features/extensions
|
|
||||||
|
|
||||||
"""
|
|
||||||
def match(self,filename,filters):
|
|
||||||
if isinstance(filters,str):
|
|
||||||
filters = [filters]
|
|
||||||
return len(set(filename.lower().split('.')) & set(filters)) > 0
|
|
||||||
|
|
||||||
def getName(self):
|
|
||||||
return self.__class__.__name__.lower()
|
|
||||||
def get_authURL(self):
|
|
||||||
|
|
||||||
config = Cloud.Config[self.getName()]['config']
|
|
||||||
url = config['authURL']
|
|
||||||
|
|
||||||
if '?' in url == False:
|
|
||||||
url += '?'
|
|
||||||
keys=['client_id','redirect_uri']
|
|
||||||
p = []
|
|
||||||
for id in keys:
|
|
||||||
value = config[id]
|
|
||||||
p.append(id+'='+value)
|
|
||||||
url = url +"&"+ "&".join(p)
|
|
||||||
|
|
||||||
return url
|
|
||||||
Cloud.Config = {}
|
|
||||||
|
|
||||||
class Google(Cloud):
|
|
||||||
def __init__(self,conf=None):
|
|
||||||
Cloud.__init__(self)
|
|
||||||
def getName(self):
|
|
||||||
return 'google-drive'
|
|
||||||
def init(self,token):
|
|
||||||
self.refresh_token = token
|
|
||||||
self._refresh()
|
|
||||||
|
|
||||||
def _refresh(self,code=None):
|
|
||||||
url = "https://accounts.google.com/o/oauth2/token"
|
|
||||||
headers = {"Content-Type":"application/x-www-form-urlencoded"}
|
|
||||||
data = {"client_id":self.client_id,"client_secret":self.secret}
|
|
||||||
if code :
|
|
||||||
grant_type = 'authorization_code'
|
|
||||||
data['code'] = code
|
|
||||||
else:
|
|
||||||
data['refresh_token'] = self.refresh_token
|
|
||||||
grant_type = 'refresh_token'
|
|
||||||
|
|
||||||
data['grant_type'] = grant_type
|
|
||||||
data['redirect_uri'] = self.redirect_uri
|
|
||||||
|
|
||||||
resp = requests.post(url,headers=headers,data=data)
|
|
||||||
r = json.loads(resp.text)
|
|
||||||
if 'access_token' in r:
|
|
||||||
self.access_token = r['access_token']
|
|
||||||
self.refresh_token = r['refresh_token'] if 'refresh_token' in r else r['access_token']
|
|
||||||
self.id_token = r['id_token']
|
|
||||||
|
|
||||||
|
|
||||||
def create_file(self,**args):
|
|
||||||
url = "https://www.googleapis.com/upload/drive/v2/files" ;
|
|
||||||
headers = {"Authorization":"Bearer "+self.access_token}
|
|
||||||
headers['Content-Type'] = args['mimetype']
|
|
||||||
params = args['params']
|
|
||||||
if 'data' not in args :
|
|
||||||
r = requests.post(url,params = params,headers=headers)
|
|
||||||
else:
|
|
||||||
data = args['data']
|
|
||||||
r = requests.post(url,data=data,params = params,headers=headers)
|
|
||||||
return r.json()
|
|
||||||
def update_metadata(self,id,metadata) :
|
|
||||||
url = "https://www.googleapis.com/drive/v2/files"
|
|
||||||
headers = {"Authorization":"Bearer "+self.access_token}
|
|
||||||
headers['Content-Type'] = 'application/json; charset=UTF-8'
|
|
||||||
|
|
||||||
if id is not None :
|
|
||||||
url += ("/"+id)
|
|
||||||
r = requests.put(url,json=metadata,headers=headers)
|
|
||||||
else:
|
|
||||||
# url += ("/?key="+self.secret)
|
|
||||||
r = requests.post(url,data=json.dumps(metadata),headers=headers)
|
|
||||||
|
|
||||||
return r.json()
|
|
||||||
|
|
||||||
def upload(self,folder,mimetype,file):
|
|
||||||
"""
|
|
||||||
This function will upload a file to a given folder and will provide
|
|
||||||
If the folder doesn't exist it will be created otherwise the references will be fetched
|
|
||||||
This allows us to avoid having to create several folders with the same name
|
|
||||||
"""
|
|
||||||
r = self.get_files(folder)
|
|
||||||
|
|
||||||
if len(r) == 0 :
|
|
||||||
info = {"name":folder, "mimeType":"application/vnd.google-apps.folder"}
|
|
||||||
r = self.update_metadata(None,{"name":folder,"title":folder, "mimeType":"application/vnd.google-apps.folder"})
|
|
||||||
else:
|
|
||||||
r = r[0]
|
|
||||||
parent = r
|
|
||||||
parent = {"kind":"drive#file","name":folder,"id":parent['id'],"mimeType":"application/vnd.google-apps.folder"}
|
|
||||||
|
|
||||||
|
|
||||||
r = self.create_file(data=file.read(),mimetype=mimetype,params={"uploadType":"media"})
|
|
||||||
info = {"title":file.filename,"description":"Create by Cloud View"}
|
|
||||||
info['parents'] = [parent]
|
|
||||||
|
|
||||||
r = self.update_metadata(r['id'],metadata=info)
|
|
||||||
return r
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
This class is designed to allow users to interact with one-drive
|
|
||||||
"""
|
|
||||||
class OneDrive(Cloud):
|
|
||||||
def __init__(self,conf):
|
|
||||||
Cloud.__init__(self)
|
|
||||||
def getName(self):
|
|
||||||
return 'one-drive'
|
|
||||||
def init(self,token):
|
|
||||||
self.refresh_token = token
|
|
||||||
self._refresh()
|
|
||||||
|
|
||||||
def _refresh(self,code=None):
|
|
||||||
url = "https://login.live.com/oauth20_token.srf"
|
|
||||||
#url="https://login.microsoftonline.com/common/oauth2/v2.0/token"
|
|
||||||
|
|
||||||
headers = {"Content-Type":"application/x-www-form-urlencoded"}
|
|
||||||
form = {"client_id":self.client_id,"client_secret":self.secret}
|
|
||||||
if code:
|
|
||||||
grant_type = 'authorization_code'
|
|
||||||
form['code'] = str(code)
|
|
||||||
else:
|
|
||||||
grant_type = 'refresh_token'
|
|
||||||
form['refresh_token'] = self.refresh_token
|
|
||||||
form['grant_type'] = grant_type
|
|
||||||
if self.redirect_uri:
|
|
||||||
form['redirect_uri'] = self.redirect_uri
|
|
||||||
r = requests.post(url,headers=headers,data=form)
|
|
||||||
r = json.loads(r.text)
|
|
||||||
if 'access_token' in r:
|
|
||||||
self.access_token = r['access_token']
|
|
||||||
self.refresh_token = r['refresh_token']
|
|
||||||
|
|
||||||
def upload(self,folder,mimetype,file):
|
|
||||||
"""
|
|
||||||
@param folder parent.id
|
|
||||||
@param name name of the file with extension
|
|
||||||
@param stream file content
|
|
||||||
|
|
||||||
"""
|
|
||||||
path = folder+"%2f"+file.filename
|
|
||||||
url = "https://apis.live.net/v5.0/me/skydrive/files/:name?access_token=:token".replace(":name",path).replace(":token",self.access_token) ;
|
|
||||||
|
|
||||||
header = {"Authorization": "Bearer "+self.access_token,"Content-Type":mimetype}
|
|
||||||
header['Content-Type']= mimetype
|
|
||||||
r = requests.put(url,header=header,files=file)
|
|
||||||
r = r.json()
|
|
||||||
return r
|
|
||||||
|
|
||||||
"""
|
|
||||||
This class uses dropbox version 2 API
|
|
||||||
"""
|
|
||||||
class Dropbox(Cloud):
|
|
||||||
def __init__(self):
|
|
||||||
Cloud.__init__(self)
|
|
||||||
def init(self,access_token):
|
|
||||||
self.access_token = access_token
|
|
||||||
def upload(self,folder,mimetype,file):
|
|
||||||
"""
|
|
||||||
@param folder parent.id
|
|
||||||
@param name name of the file with extension
|
|
||||||
@param stream file content
|
|
||||||
|
|
||||||
@TODO: This upload will only limit itself to 150 MB, it is possible to increase this size
|
|
||||||
"""
|
|
||||||
url = "https://content.dropboxapi.com/2/files/upload"
|
|
||||||
folder = folder if folder is not None else ""
|
|
||||||
path = "/"+folder+"/"+file.name.split('/')[-1]
|
|
||||||
path = path.replace("//","/")
|
|
||||||
header = {"Authorization":"Bearer "+self.access_token,"Content-Type":mimetype}
|
|
||||||
#header['autorename']= "false"
|
|
||||||
header['mode'] = "add"
|
|
||||||
#header['mute'] = "false"
|
|
||||||
header['Dropbox-API-Arg'] = json.dumps({"path":path})
|
|
||||||
r = requests.post(url,headers=header,data=file.read())
|
|
||||||
print r.text
|
|
||||||
r = r.json()
|
|
||||||
return r
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
This class implements basic interactions with box (cloud service providers)
|
|
||||||
Available functionalities are: authentication, file access,share and stream/download
|
|
||||||
"""
|
|
||||||
class Box(Cloud) :
|
|
||||||
def __init__(self,conf):
|
|
||||||
Cloud.__init__(self);
|
|
||||||
if conf is not None:
|
|
||||||
self.client_id = conf['client_id']
|
|
||||||
self.secret = conf['secret']
|
|
||||||
self.redirect_uri = conf['redirect_uri'] if 'redirect_uri' in conf else None
|
|
||||||
def init(self,token):
|
|
||||||
self.refresh_token = token
|
|
||||||
def set(self,code) :
|
|
||||||
self._access(code)
|
|
||||||
return 1 if self.access_token else 0
|
|
||||||
|
|
||||||
def _access(self,code):
|
|
||||||
body = {"client_id":self.client_id,"client_secret":self.secret,"grant_type":"authorization_code","code":code,"redirect_uri":self.redirect_uri}
|
|
||||||
headers = {"Content-Type":"application/x-www-form-urlencoded"}
|
|
||||||
url = "https://app.box.com/api/oauth2/token"
|
|
||||||
r = requests.post(url,headers=headers,data=body)
|
|
||||||
r = json.loads(r.text)
|
|
||||||
if 'error' not in r:
|
|
||||||
self.access_token = r['access_token']
|
|
||||||
self.refresh_token= r['refresh_token']
|
|
||||||
def _refresh(self,authToken) :
|
|
||||||
body = {"client_id":self.client_id,"client_secret":self.secret,"grant_type":"refresh_token"}
|
|
||||||
url = "https://app.box.com/api/oauth2/token";
|
|
||||||
headers = {"Content-Type":"application/x-www-form-urlencoded"}
|
|
||||||
r = requests.post(url,headers=headers,data=body)
|
|
||||||
r = json.loads(r.text)
|
|
||||||
if 'error' not in r :
|
|
||||||
self.access_token = r['access_token']
|
|
||||||
def get_user(self):
|
|
||||||
url = "https://api.box.com/2.0/users/me"
|
|
||||||
headers = {"Authorization":"Bearer "+self.access_token}
|
|
||||||
r = requests.get(url,headers=headers)
|
|
||||||
r = json.loads(r.text)
|
|
||||||
if 'login' in r :
|
|
||||||
#BYTES_TO_GB = 1000000000
|
|
||||||
user = {"uii":r['name'],"uid":r['login']}
|
|
||||||
usage = {"size":r['space_amount']/Cloud.BYTES_TO_GB,"used":r['space_used']/Cloud.BYTES_TO_GB,"units":"GB"}
|
|
||||||
user['usage'] = usage
|
|
||||||
return user
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def format(self,item) :
|
|
||||||
file = {"name":item['name'],"origin":"box","id":item['id'],"url":""}
|
|
||||||
meta = {"last_modified":item['content_modified_at']}
|
|
||||||
return file
|
|
||||||
def get_files(self,ext,url=None):
|
|
||||||
ext = " ".join(ext)
|
|
||||||
url = "https://api.box.com/2.0/search?query=:filter&type=file"
|
|
||||||
url = url.replace(":filter",ext)
|
|
||||||
headers = {"Authorization":"Bearer "+self.access_token}
|
|
||||||
|
|
||||||
r = requests.get(url,headers=headers) ;
|
|
||||||
r = json.loads(r.text)
|
|
||||||
if 'entries' in r:
|
|
||||||
#self.files = [ self.format(file) for file in r['entries'] if file['type'] == 'file' and 'id' in file]
|
|
||||||
for item in r :
|
|
||||||
if item['type'] == 'file' and 'id' in item :
|
|
||||||
self.files.append( self.format(item))
|
|
||||||
else:
|
|
||||||
#
|
|
||||||
# We are dealing with a folder, this is necessary uploads
|
|
||||||
#
|
|
||||||
self.folder[item['name']] = item["id"]
|
|
||||||
|
|
||||||
return self.files
|
|
||||||
def stream(self,url):
|
|
||||||
headers = {"Authorization":"Bearer "+self.access_token}
|
|
||||||
r = requests.get(url,headers=headers,stream=True)
|
|
||||||
yield r.content
|
|
||||||
def share(self,id):
|
|
||||||
url = "https://api.box.com/2.0/files/:id".replace(":id",id);
|
|
||||||
headers = {"Authorization":"Bearer "+self.access_token,"Content-Type":"application/json"}
|
|
||||||
body = {"shared_link":{"access":"open","permissions":{"can_download":True}}}
|
|
||||||
r = requests.put(url,headers=headers,data=json.dumps(body))
|
|
||||||
r = json.loads(r.text)
|
|
||||||
if 'shared_link' in r:
|
|
||||||
return r['shared_link']['download_url']
|
|
||||||
|
|
||||||
return None
|
|
||||||
def upload(self,folder,mimetype,file):
|
|
||||||
"""
|
|
||||||
@param folder parent.id
|
|
||||||
@param name name of the file with extension
|
|
||||||
@param stream file content
|
|
||||||
"""
|
|
||||||
if folder not in self.folders :
|
|
||||||
#
|
|
||||||
# Let us create the folder now
|
|
||||||
#
|
|
||||||
url = "https://api.box.com/2.0/folders"
|
|
||||||
header = {"Authorization":"Bearer "+self.access_token}
|
|
||||||
pid = self.folders["/"] if "/" in self.folders else self.folders[""]
|
|
||||||
data = {"parent":{"id":str(pid)}}
|
|
||||||
|
|
||||||
r = requests.post(url,header=header,data=data)
|
|
||||||
r = r.json()
|
|
||||||
pid = r["id"]
|
|
||||||
else:
|
|
||||||
pid = self.folders[folder]
|
|
||||||
url = "https://upload.box.com/api/2.0/files/content"
|
|
||||||
header = {"Authorization Bearer ":self.access_token,"Content-Type":mimetype}
|
|
||||||
r = requests.post(url,header=header,file=file)
|
|
||||||
r = r.json()
|
|
||||||
return r
|
|
||||||
|
|
||||||
|
|
||||||
class SugarSync(Cloud):
|
|
||||||
def __init__(self):
|
|
||||||
Cloud.__init__(self)
|
|
||||||
|
|
||||||
def __init__(self,conf):
|
|
||||||
Cloud.__init__(self);
|
|
||||||
if conf is not None:
|
|
||||||
self.client_id = conf['app_id']
|
|
||||||
self.private_key = conf['private_key']
|
|
||||||
self.access_key = conf['access_key']
|
|
||||||
#self.access_token = None
|
|
||||||
#self.refresh_token= None
|
|
||||||
# self.redirect_uri = conf['redirect_uri'] if 'redirect_uri' in conf else None
|
|
||||||
#self.files = []
|
|
||||||
def init(self,token):
|
|
||||||
self.refresh_token = token
|
|
||||||
self._refresh()
|
|
||||||
def login(self,email,password):
|
|
||||||
xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><appAuthorization><username>:username</username><password>:password</password><application>:app_id</application><accessKeyId>:accesskey</accessKeyId><privateAccessKey>:privatekey</privateAccessKey></appAuthorization>'
|
|
||||||
xml = xml.replace(":app_id",self.app_id).replace(":privatekey",self.private_key).replace(":accesskey",self.access_key).replace(":username",email).replace(":password",password)
|
|
||||||
headers = {"Content-Type":"application/xml","User-Agent":"The Phi Technology"}
|
|
||||||
r = requests.post(url,headers=headers,data=xml)
|
|
||||||
self.refresh_token = r.headers['Location']
|
|
||||||
|
|
||||||
|
|
||||||
def _refresh(self):
|
|
||||||
xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><tokenAuthRequest><accessKeyId>:accesskey</accessKeyId><privateAccessKey>:privatekey</privateAccessKey><refreshToken>:authtoken</refreshToken></tokenAuthRequest>'
|
|
||||||
xml = xml.replace(":accesskey",self.access_key).replace(":privatekey",self.private_key).replace(":authtoken",self.refresh_token)
|
|
||||||
|
|
||||||
headers = {"Content-Type":"application/xml","User-Agent":"The Phi Technology LLC"}
|
|
||||||
url = "https://api.sugarsync.com/authorization"
|
|
||||||
r = requests.post(url,data=xml,headers=headers)
|
|
||||||
|
|
||||||
self.access_token = r.headers['Location']
|
|
||||||
def format(self,item):
|
|
||||||
file = {}
|
|
||||||
file['name'] = item['displayName']
|
|
||||||
file['url'] = item['fileData']
|
|
||||||
file['id'] = item['ref']
|
|
||||||
meta = {}
|
|
||||||
meta['last_modified'] = item['lastModified']
|
|
||||||
file['meta'] = meta
|
|
||||||
return file
|
|
||||||
|
|
||||||
def get_files(self,ext,url=None) :
|
|
||||||
if url is None:
|
|
||||||
url = "https://api.sugarsync.com/folder/:sc:3989243:2/contents";
|
|
||||||
headers = {"Authorization":self.access_token,"User-Agent":"The Phi Technology LLC","Content-Type":"application/xml;charset=utf-8"}
|
|
||||||
r = requests.get(url,headers=headers)
|
|
||||||
stream = r.text #.encode('utf-8')
|
|
||||||
r = xmltodict.parse(r.text)
|
|
||||||
|
|
||||||
if 'collectionContents' in r:
|
|
||||||
|
|
||||||
r = r['collectionContents']
|
|
||||||
#
|
|
||||||
# Extracting files in the current folder then we will see if there are any subfolders
|
|
||||||
# The parser has weird behaviors that leave inconsistent objects (field names)
|
|
||||||
# This means we have to filter it out by testing the item being processed
|
|
||||||
if 'file' in r:
|
|
||||||
if isinstance(r['file'],dict):
|
|
||||||
self.files += [ self.format(r['file']) ]
|
|
||||||
else:
|
|
||||||
|
|
||||||
#self.files += [self.format(item) for item in r['file'] if isinstance(item,(str, unicode)) == False and item['displayName'].endswith(ext)]
|
|
||||||
self.files += [self.format(item) for item in r['file'] if isinstance(item,(str, unicode)) == False and self.match(item['displayName'],ext)]
|
|
||||||
|
|
||||||
if 'collection' in r:
|
|
||||||
if isinstance(r['collection'],dict) :
|
|
||||||
#
|
|
||||||
# For some unusual reason the parser handles single instances as objects instead of collection
|
|
||||||
# @NOTE: This is a behavior that happens when a single item is in the collection
|
|
||||||
#
|
|
||||||
self.get_files(ext,r['collection']['contents'])
|
|
||||||
for item in r['collection'] :
|
|
||||||
if 'contents' in item:
|
|
||||||
if isinstance(item,(str, unicode)) == False:
|
|
||||||
self.files += self.get_files(ext,item['contents'])
|
|
||||||
#[ self.get_files(ext,item['contents']) for item in r['collection'] if item['type'] == 'folder']
|
|
||||||
return self.files
|
|
||||||
|
|
||||||
def get_user(self):
|
|
||||||
url = "https://api.sugarsync.com/user"
|
|
||||||
headers = {"Authorization":self.access_token,"User-Agent":"The Phi Technology LLC","Content-Type":"application/xml;charset=utf-8"}
|
|
||||||
r = requests.get(url,headers=headers)
|
|
||||||
|
|
||||||
r = xmltodict.parse(r.text)
|
|
||||||
r = r['user']
|
|
||||||
|
|
||||||
|
|
||||||
if 'username' in r and 'quota' in r:
|
|
||||||
user = {"uid":r['username'],"uii":r['nickname']}
|
|
||||||
size = long(r['quota']['limit'])
|
|
||||||
used = long(r['quota']['usage'])
|
|
||||||
usage = {"size":size/Cloud.BYTES_TO_GB,"used":used/Cloud.BYTES_TO_GB,"units":"GB"}
|
|
||||||
user['usage'] = usage
|
|
||||||
return user
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
def stream(self,url):
|
|
||||||
headers = {"Authorization":self.access_token}
|
|
||||||
r = requests.get(url,headers=headers,stream=True)
|
|
||||||
yield r.content
|
|
||||||
"""
|
|
||||||
This function will create a public link and share it to designated parties
|
|
||||||
"""
|
|
||||||
def share(self,id):
|
|
||||||
url = "https://api.sugarsync.com/file/:id".replace(":id",id);
|
|
||||||
xml = '<?xml version="1.0" encoding="UTF-8" ?><file><publicLink enabled="true"/></file>';
|
|
||||||
headers = {"Content-Type":"application/xml","Authorization":self.access_token,"User-Agent":"The Phi Technology LLC"}
|
|
||||||
r = requests.put(url,header=header,data=xml)
|
|
||||||
r = xmltodict.parse(r.text)
|
|
||||||
if 'file' in r:
|
|
||||||
return r['file']['publicLink']['content']+"?directDownload=true"
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def upload(self,folder,mimetype,file):
|
|
||||||
|
|
||||||
name = foler+"/"+file.filename
|
|
||||||
xml = '<?xml version="1.0" encoding="UTF-8" ?><file><displayName>:name</displayName><mediaType>:type</mediaType></file>'
|
|
||||||
xml = xml.replace(':name',name).replace(':type',mimetype)
|
|
||||||
header = {"content-type":"application/xml","User-Agent":"The Phi Technology LLC"}
|
|
||||||
header['Authorization'] = self.access_token
|
|
||||||
|
|
||||||
r = requests.post(url,headers=header,files=file,data=xml)
|
|
||||||
pass
|
|
||||||
|
|
||||||
class iTunes(Cloud):
|
|
||||||
def __init__(self):
|
|
||||||
Cloud.__init__(self)
|
|
||||||
self.url_topsongs = "http://ax.itunes.apple.com/WebObjects/MZStoreServices.woa/ws/RSS/topsongs/limit=:limit/explicit=false/json"
|
|
||||||
self.url_search = "http://itunes.apple.com/search?term=:keyword&limit=:limit&media=music"
|
|
||||||
def parse_search(self,obj):
|
|
||||||
|
|
||||||
files = []
|
|
||||||
try:
|
|
||||||
logs = obj['results']
|
|
||||||
|
|
||||||
for item in logs :
|
|
||||||
|
|
||||||
file = {}
|
|
||||||
file['id'] = item['trackId']
|
|
||||||
file['name'] = item['trackName']
|
|
||||||
file['id3'] = {}
|
|
||||||
file['id3']['track'] = item['trackName']
|
|
||||||
file['id3']['title'] = item['trackName']
|
|
||||||
file['id3']['artist']= item['artistName']
|
|
||||||
file['id3']['album'] = item['collectionName']
|
|
||||||
file['id3']['genre'] = item['primaryGenreName']
|
|
||||||
file['id3']['poster']= item['artworkUrl100']
|
|
||||||
file['url'] = item['previewUrl']
|
|
||||||
|
|
||||||
files.append(file)
|
|
||||||
except Exception,e:
|
|
||||||
print e
|
|
||||||
return files
|
|
||||||
def parse_chart(self,obj):
|
|
||||||
"""
|
|
||||||
This function will parse the tonsongs returned by the itunes API
|
|
||||||
"""
|
|
||||||
files = []
|
|
||||||
|
|
||||||
try:
|
|
||||||
logs = obj['feed']['entry']
|
|
||||||
if isinstance(logs,dict) :
|
|
||||||
logs = [logs]
|
|
||||||
|
|
||||||
for item in logs :
|
|
||||||
|
|
||||||
file = {'name':item['im:name']['label'],'id3':{}}
|
|
||||||
file['id'] = item['id']['attributes']['im:id']
|
|
||||||
file['id3'] = {}
|
|
||||||
file['id3']['artist'] = item['im:artist']['label']
|
|
||||||
file['id3']['track'] = item['title']['label']
|
|
||||||
file['id3']['title'] = item['title']['label']
|
|
||||||
file['id3']['album'] = item['im:collection']['im:name']['label']
|
|
||||||
file['id3']['genre'] = item['category']['attributes']['term']
|
|
||||||
index = len(item['im:image'])-1
|
|
||||||
file['id3']['poster'] = item['im:image'][index]['label']
|
|
||||||
url = [link['attributes']['href'] for link in item['link'] if 'im:assetType' in link['attributes'] and link['attributes']['im:assetType']=='preview']
|
|
||||||
|
|
||||||
if len(url) > 0:
|
|
||||||
url = url[0]
|
|
||||||
file['url'] = url #item['link'][1]['attributes']['href'] //'im:assetType' == 'preview' and 'im:duration' is in the sub-item
|
|
||||||
files.append(file)
|
|
||||||
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
except Exception,e:
|
|
||||||
print e
|
|
||||||
#
|
|
||||||
# @TODO: Log the error somewhere to make it useful
|
|
||||||
|
|
||||||
return files
|
|
||||||
|
|
||||||
def parse(self,obj) :
|
|
||||||
if 'feed' in obj and 'entry' in obj['feed']:
|
|
||||||
return self.parse_chart(obj)
|
|
||||||
elif 'results' in obj :
|
|
||||||
return self.parse_search(obj)
|
|
||||||
else:
|
|
||||||
return []
|
|
||||||
def get_files(self,keyword=None,limit="1") :
|
|
||||||
url = self.url_search if keyword is not None else self.url_topsongs
|
|
||||||
keyword = "" if keyword is None else keyword
|
|
||||||
# limit = "50" if keyword == "" else "1"
|
|
||||||
|
|
||||||
url = url.replace(":keyword",keyword.replace(' ','+')).replace(':limit',limit)
|
|
||||||
r = requests.get(url)
|
|
||||||
r= r.json()
|
|
||||||
return self.parse(r)
|
|
@ -1,709 +0,0 @@
|
|||||||
"""
|
|
||||||
This file implements data transport stuctures in order to allow data to be moved to and from anywhere
|
|
||||||
We can thus read data from disk and write to the cloud,queue, or couchdb or SQL
|
|
||||||
"""
|
|
||||||
from flask import request, session
|
|
||||||
import os
|
|
||||||
import pika
|
|
||||||
import json
|
|
||||||
import numpy as np
|
|
||||||
from couchdbkit import Server
|
|
||||||
import re
|
|
||||||
from csv import reader
|
|
||||||
from datetime import datetime
|
|
||||||
"""
|
|
||||||
@TODO: Write a process by which the class automatically handles reading and creating a preliminary sample and discovers the meta data
|
|
||||||
"""
|
|
||||||
class Reader:
|
|
||||||
def __init__(self):
|
|
||||||
self.nrows = 0
|
|
||||||
self.xchar = None
|
|
||||||
|
|
||||||
def row_count(self):
|
|
||||||
content = self.read()
|
|
||||||
return np.sum([1 for row in content])
|
|
||||||
"""
|
|
||||||
This function determines the most common delimiter from a subset of possible delimiters. It uses a statistical approach to guage the distribution of columns for a given delimiter
|
|
||||||
"""
|
|
||||||
def delimiter(self,sample):
|
|
||||||
|
|
||||||
m = {',':[],'\t':[],'|':[],'\x3A':[]}
|
|
||||||
delim = m.keys()
|
|
||||||
for row in sample:
|
|
||||||
for xchar in delim:
|
|
||||||
if row.split(xchar) > 1:
|
|
||||||
m[xchar].append(len(row.split(xchar)))
|
|
||||||
else:
|
|
||||||
m[xchar].append(0)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#
|
|
||||||
# The delimiter with the smallest variance, provided the mean is greater than 1
|
|
||||||
# This would be troublesome if there many broken records sampled
|
|
||||||
#
|
|
||||||
m = {id: np.var(m[id]) for id in m.keys() if m[id] != [] and int(np.mean(m[id]))>1}
|
|
||||||
index = m.values().index( min(m.values()))
|
|
||||||
xchar = m.keys()[index]
|
|
||||||
|
|
||||||
return xchar
|
|
||||||
"""
|
|
||||||
This function determines the number of columns of a given sample
|
|
||||||
@pre self.xchar is not None
|
|
||||||
"""
|
|
||||||
def col_count(self,sample):
|
|
||||||
|
|
||||||
m = {}
|
|
||||||
i = 0
|
|
||||||
|
|
||||||
for row in sample:
|
|
||||||
row = self.format(row)
|
|
||||||
id = str(len(row))
|
|
||||||
#id = str(len(row.split(self.xchar)))
|
|
||||||
|
|
||||||
if id not in m:
|
|
||||||
m[id] = 0
|
|
||||||
m[id] = m[id] + 1
|
|
||||||
|
|
||||||
index = m.values().index( max(m.values()) )
|
|
||||||
ncols = int(m.keys()[index])
|
|
||||||
|
|
||||||
|
|
||||||
return ncols;
|
|
||||||
"""
|
|
||||||
This function will clean records of a given row by removing non-ascii characters
|
|
||||||
@pre self.xchar is not None
|
|
||||||
"""
|
|
||||||
def format (self,row):
|
|
||||||
|
|
||||||
if isinstance(row,list) == False:
|
|
||||||
#
|
|
||||||
# We've observed sometimes fields contain delimiter as a legitimate character, we need to be able to account for this and not tamper with the field values (unless necessary)
|
|
||||||
cols = self.split(row)
|
|
||||||
#cols = row.split(self.xchar)
|
|
||||||
else:
|
|
||||||
cols = row ;
|
|
||||||
return [ re.sub('[^\x00-\x7F,\n,\r,\v,\b,]',' ',col.strip()).strip().replace('"','') for col in cols]
|
|
||||||
|
|
||||||
#if isinstance(row,list) == False:
|
|
||||||
# return (self.xchar.join(r)).format('utf-8')
|
|
||||||
#else:
|
|
||||||
# return r
|
|
||||||
"""
|
|
||||||
This function performs a split of a record and tries to attempt to preserve the integrity of the data within i.e accounting for the double quotes.
|
|
||||||
@pre : self.xchar is not None
|
|
||||||
"""
|
|
||||||
def split (self,row):
|
|
||||||
|
|
||||||
pattern = "".join(["(?:^|",self.xchar,")(\"(?:[^\"]+|\"\")*\"|[^",self.xchar,"]*)"])
|
|
||||||
return re.findall(pattern,row.replace('\n',''))
|
|
||||||
|
|
||||||
class Writer:
|
|
||||||
|
|
||||||
def format(self,row,xchar):
|
|
||||||
if xchar is not None and isinstance(row,list):
|
|
||||||
return xchar.join(row)+'\n'
|
|
||||||
elif xchar is None and isinstance(row,dict):
|
|
||||||
row = json.dumps(row)
|
|
||||||
return row
|
|
||||||
"""
|
|
||||||
It is important to be able to archive data so as to insure that growth is controlled
|
|
||||||
Nothing in nature grows indefinitely neither should data being handled.
|
|
||||||
"""
|
|
||||||
def archive(self):
|
|
||||||
pass
|
|
||||||
def flush(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
"""
|
|
||||||
This class is designed to read data from an Http request file handler provided to us by flask
|
|
||||||
The file will be heald in memory and processed accordingly
|
|
||||||
NOTE: This is inefficient and can crash a micro-instance (becareful)
|
|
||||||
"""
|
|
||||||
class HttpRequestReader(Reader):
|
|
||||||
def __init__(self,**params):
|
|
||||||
self.file_length = 0
|
|
||||||
try:
|
|
||||||
|
|
||||||
#self.file = params['file']
|
|
||||||
#self.file.seek(0, os.SEEK_END)
|
|
||||||
#self.file_length = self.file.tell()
|
|
||||||
|
|
||||||
#print 'size of file ',self.file_length
|
|
||||||
self.content = params['file'].readlines()
|
|
||||||
self.file_length = len(self.content)
|
|
||||||
except Exception, e:
|
|
||||||
print "Error ... ",e
|
|
||||||
pass
|
|
||||||
|
|
||||||
def isready(self):
|
|
||||||
return self.file_length > 0
|
|
||||||
def read(self,size =-1):
|
|
||||||
i = 1
|
|
||||||
for row in self.content:
|
|
||||||
i += 1
|
|
||||||
if size == i:
|
|
||||||
break
|
|
||||||
yield row
|
|
||||||
|
|
||||||
"""
|
|
||||||
This class is designed to write data to a session/cookie
|
|
||||||
"""
|
|
||||||
class HttpSessionWriter(Writer):
|
|
||||||
"""
|
|
||||||
@param key required session key
|
|
||||||
"""
|
|
||||||
def __init__(self,**params):
|
|
||||||
self.session = params['queue']
|
|
||||||
self.session['sql'] = []
|
|
||||||
self.session['csv'] = []
|
|
||||||
self.tablename = re.sub('..+$','',params['filename'])
|
|
||||||
self.session['uid'] = params['uid']
|
|
||||||
#self.xchar = params['xchar']
|
|
||||||
|
|
||||||
|
|
||||||
def format_sql(self,row):
|
|
||||||
values = "','".join([col.replace('"','').replace("'",'') for col in row])
|
|
||||||
return "".join(["INSERT INTO :table VALUES('",values,"');\n"]).replace(':table',self.tablename)
|
|
||||||
def isready(self):
|
|
||||||
return True
|
|
||||||
def write(self,**params):
|
|
||||||
label = params['label']
|
|
||||||
row = params ['row']
|
|
||||||
|
|
||||||
if label == 'usable':
|
|
||||||
self.session['csv'].append(self.format(row,','))
|
|
||||||
self.session['sql'].append(self.format_sql(row))
|
|
||||||
|
|
||||||
"""
|
|
||||||
This class is designed to read data from disk (location on hard drive)
|
|
||||||
@pre : isready() == True
|
|
||||||
"""
|
|
||||||
class DiskReader(Reader) :
|
|
||||||
"""
|
|
||||||
@param path absolute path of the file to be read
|
|
||||||
"""
|
|
||||||
def __init__(self,**params):
|
|
||||||
Reader.__init__(self)
|
|
||||||
self.path = params['path'] ;
|
|
||||||
|
|
||||||
def isready(self):
|
|
||||||
return os.path.exists(self.path)
|
|
||||||
"""
|
|
||||||
This function reads the rows from a designated location on disk
|
|
||||||
@param size number of rows to be read, -1 suggests all rows
|
|
||||||
"""
|
|
||||||
def read(self,size=-1):
|
|
||||||
f = open(self.path,'rU')
|
|
||||||
i = 1
|
|
||||||
for row in f:
|
|
||||||
|
|
||||||
i += 1
|
|
||||||
if size == i:
|
|
||||||
break
|
|
||||||
yield row
|
|
||||||
f.close()
|
|
||||||
"""
|
|
||||||
This function writes output to disk in a designated location
|
|
||||||
"""
|
|
||||||
class DiskWriter(Writer):
|
|
||||||
def __init__(self,**params):
|
|
||||||
if 'path' in params:
|
|
||||||
self.path = params['path']
|
|
||||||
else:
|
|
||||||
self.path = None
|
|
||||||
if 'name' in params:
|
|
||||||
self.name = params['name'];
|
|
||||||
else:
|
|
||||||
self.name = None
|
|
||||||
if os.path.exists(self.path) == False:
|
|
||||||
os.mkdir(self.path)
|
|
||||||
"""
|
|
||||||
This function determines if the class is ready for execution or not
|
|
||||||
i.e it determines if the preconditions of met prior execution
|
|
||||||
"""
|
|
||||||
def isready(self):
|
|
||||||
|
|
||||||
p = self.path is not None and os.path.exists(self.path)
|
|
||||||
q = self.name is not None
|
|
||||||
return p and q
|
|
||||||
"""
|
|
||||||
This function writes a record to a designated file
|
|
||||||
@param label <passed|broken|fixed|stats>
|
|
||||||
@param row row to be written
|
|
||||||
"""
|
|
||||||
def write(self,**params):
|
|
||||||
label = params['label']
|
|
||||||
row = params['row']
|
|
||||||
xchar = None
|
|
||||||
if 'xchar' is not None:
|
|
||||||
xchar = params['xchar']
|
|
||||||
path = ''.join([self.path,os.sep,label])
|
|
||||||
if os.path.exists(path) == False:
|
|
||||||
os.mkdir(path) ;
|
|
||||||
path = ''.join([path,os.sep,self.name])
|
|
||||||
f = open(path,'a')
|
|
||||||
row = self.format(row,xchar);
|
|
||||||
f.write(row)
|
|
||||||
f.close()
|
|
||||||
"""
|
|
||||||
This class hierarchy is designed to handle interactions with a queue server using pika framework (our tests are based on rabbitmq)
|
|
||||||
"""
|
|
||||||
class MessageQueue:
|
|
||||||
def __init__(self,**params):
|
|
||||||
self.host= params['host']
|
|
||||||
self.uid = params['uid']
|
|
||||||
self.qid = params['qid']
|
|
||||||
|
|
||||||
def isready(self):
|
|
||||||
#self.init()
|
|
||||||
resp = self.connection is not None and self.connection.is_open
|
|
||||||
self.close()
|
|
||||||
return resp
|
|
||||||
def close(self):
|
|
||||||
if self.connection.is_closed == False :
|
|
||||||
self.channel.close()
|
|
||||||
self.connection.close()
|
|
||||||
"""
|
|
||||||
This class is designed to publish content to an AMQP (Rabbitmq)
|
|
||||||
The class will rely on pika to implement this functionality
|
|
||||||
|
|
||||||
We will publish information to a given queue for a given exchange
|
|
||||||
"""
|
|
||||||
|
|
||||||
class QueueWriter(MessageQueue,Writer):
|
|
||||||
def __init__(self,**params):
|
|
||||||
#self.host= params['host']
|
|
||||||
#self.uid = params['uid']
|
|
||||||
#self.qid = params['queue']
|
|
||||||
MessageQueue.__init__(self,**params);
|
|
||||||
|
|
||||||
|
|
||||||
def init(self,label=None):
|
|
||||||
properties = pika.ConnectionParameters(host=self.host)
|
|
||||||
self.connection = pika.BlockingConnection(properties)
|
|
||||||
self.channel = self.connection.channel()
|
|
||||||
self.info = self.channel.exchange_declare(exchange=self.uid,type='direct',durable=True)
|
|
||||||
if label is None:
|
|
||||||
self.qhandler = self.channel.queue_declare(queue=self.qid,durable=True)
|
|
||||||
else:
|
|
||||||
self.qhandler = self.channel.queue_declare(queue=label,durable=True)
|
|
||||||
|
|
||||||
self.channel.queue_bind(exchange=self.uid,queue=self.qhandler.method.queue)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
This function writes a stream of data to the a given queue
|
|
||||||
@param object object to be written (will be converted to JSON)
|
|
||||||
@TODO: make this less chatty
|
|
||||||
"""
|
|
||||||
def write(self,**params):
|
|
||||||
xchar = None
|
|
||||||
if 'xchar' in params:
|
|
||||||
xchar = params['xchar']
|
|
||||||
object = self.format(params['row'],xchar)
|
|
||||||
|
|
||||||
label = params['label']
|
|
||||||
self.init(label)
|
|
||||||
_mode = 2
|
|
||||||
if isinstance(object,str):
|
|
||||||
stream = object
|
|
||||||
_type = 'text/plain'
|
|
||||||
else:
|
|
||||||
stream = json.dumps(object)
|
|
||||||
if 'type' in params :
|
|
||||||
_type = params['type']
|
|
||||||
else:
|
|
||||||
_type = 'application/json'
|
|
||||||
|
|
||||||
self.channel.basic_publish(
|
|
||||||
exchange=self.uid,
|
|
||||||
routing_key=label,
|
|
||||||
body=stream,
|
|
||||||
properties=pika.BasicProperties(content_type=_type,delivery_mode=_mode)
|
|
||||||
);
|
|
||||||
self.close()
|
|
||||||
|
|
||||||
def flush(self,label):
|
|
||||||
self.init(label)
|
|
||||||
_mode = 1 #-- Non persistent
|
|
||||||
self.channel.queue_delete( queue=label);
|
|
||||||
self.close()
|
|
||||||
|
|
||||||
"""
|
|
||||||
This class will read from a queue provided an exchange, queue and host
|
|
||||||
@TODO: Account for security and virtualhosts
|
|
||||||
"""
|
|
||||||
class QueueReader(MessageQueue,Reader):
|
|
||||||
"""
|
|
||||||
@param host host
|
|
||||||
@param uid exchange identifier
|
|
||||||
@param qid queue identifier
|
|
||||||
"""
|
|
||||||
def __init__(self,**params):
|
|
||||||
#self.host= params['host']
|
|
||||||
#self.uid = params['uid']
|
|
||||||
#self.qid = params['qid']
|
|
||||||
MessageQueue.__init__(self,**params);
|
|
||||||
if 'durable' in params :
|
|
||||||
self.durable = True
|
|
||||||
else:
|
|
||||||
self.durable = False
|
|
||||||
self.size = -1
|
|
||||||
self.data = {}
|
|
||||||
def init(self,qid):
|
|
||||||
|
|
||||||
properties = pika.ConnectionParameters(host=self.host)
|
|
||||||
self.connection = pika.BlockingConnection(properties)
|
|
||||||
self.channel = self.connection.channel()
|
|
||||||
self.channel.exchange_declare(exchange=self.uid,type='direct',durable=True)
|
|
||||||
|
|
||||||
self.info = self.channel.queue_declare(queue=qid,durable=True)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
This is the callback function designed to process the data stream from the queue
|
|
||||||
|
|
||||||
"""
|
|
||||||
def callback(self,channel,method,header,stream):
|
|
||||||
|
|
||||||
r = []
|
|
||||||
if re.match("^\{|\[",stream) is not None:
|
|
||||||
r = json.loads(stream)
|
|
||||||
else:
|
|
||||||
|
|
||||||
r = stream
|
|
||||||
|
|
||||||
qid = self.info.method.queue
|
|
||||||
if qid not in self.data :
|
|
||||||
self.data[qid] = []
|
|
||||||
|
|
||||||
self.data[qid].append(r)
|
|
||||||
#
|
|
||||||
# We stop reading when the all the messages of the queue are staked
|
|
||||||
#
|
|
||||||
if self.size == len(self.data[qid]) or len(self.data[qid]) == self.info.method.message_count:
|
|
||||||
self.close()
|
|
||||||
|
|
||||||
"""
|
|
||||||
This function will read, the first message from a queue
|
|
||||||
@TODO:
|
|
||||||
Implement channel.basic_get in order to retrieve a single message at a time
|
|
||||||
Have the number of messages retrieved be specified by size (parameter)
|
|
||||||
"""
|
|
||||||
def read(self,size=-1):
|
|
||||||
r = {}
|
|
||||||
self.size = size
|
|
||||||
#
|
|
||||||
# We enabled the reader to be able to read from several queues (sequentially for now)
|
|
||||||
# The qid parameter will be an array of queues the reader will be reading from
|
|
||||||
#
|
|
||||||
if isinstance(self.qid,basestring) :
|
|
||||||
self.qid = [self.qid]
|
|
||||||
for qid in self.qid:
|
|
||||||
self.init(qid)
|
|
||||||
# r[qid] = []
|
|
||||||
|
|
||||||
if self.info.method.message_count > 0:
|
|
||||||
|
|
||||||
self.channel.basic_consume(self.callback,queue=qid,no_ack=False);
|
|
||||||
self.channel.start_consuming()
|
|
||||||
else:
|
|
||||||
|
|
||||||
pass
|
|
||||||
#self.close()
|
|
||||||
# r[qid].append( self.data)
|
|
||||||
|
|
||||||
return self.data
|
|
||||||
class QueueListener(QueueReader):
|
|
||||||
def init(self,qid):
|
|
||||||
properties = pika.ConnectionParameters(host=self.host)
|
|
||||||
self.connection = pika.BlockingConnection(properties)
|
|
||||||
self.channel = self.connection.channel()
|
|
||||||
self.channel.exchange_declare(exchange=self.uid,type='direct',durable=True )
|
|
||||||
|
|
||||||
self.info = self.channel.queue_declare(passive=True,exclusive=True,queue=qid)
|
|
||||||
|
|
||||||
self.channel.queue_bind(exchange=self.uid,queue=self.info.method.queue,routing_key=qid)
|
|
||||||
#self.callback = callback
|
|
||||||
def read(self):
|
|
||||||
|
|
||||||
self.init(self.qid)
|
|
||||||
self.channel.basic_consume(self.callback,queue=self.qid,no_ack=True);
|
|
||||||
self.channel.start_consuming()
|
|
||||||
|
|
||||||
"""
|
|
||||||
This class is designed to write output as sql insert statements
|
|
||||||
The class will inherit from DiskWriter with minor adjustments
|
|
||||||
@TODO: Include script to create the table if need be using the upper bound of a learner
|
|
||||||
"""
|
|
||||||
class SQLDiskWriter(DiskWriter):
|
|
||||||
def __init__(self,**args):
|
|
||||||
DiskWriter.__init__(self,**args)
|
|
||||||
self.tablename = re.sub('\..+$','',self.name).replace(' ','_')
|
|
||||||
"""
|
|
||||||
@param label
|
|
||||||
@param row
|
|
||||||
@param xchar
|
|
||||||
"""
|
|
||||||
def write(self,**args):
|
|
||||||
label = args['label']
|
|
||||||
row = args['row']
|
|
||||||
|
|
||||||
if label == 'usable':
|
|
||||||
values = "','".join([col.replace('"','').replace("'",'') for col in row])
|
|
||||||
row = "".join(["INSERT INTO :table VALUES('",values,"');\n"]).replace(':table',self.tablename)
|
|
||||||
|
|
||||||
args['row'] = row
|
|
||||||
DiskWriter.write(self,**args)
|
|
||||||
class Couchdb:
|
|
||||||
"""
|
|
||||||
@param uri host & port reference
|
|
||||||
@param uid user id involved
|
|
||||||
|
|
||||||
@param dbname database name (target)
|
|
||||||
"""
|
|
||||||
def __init__(self,**args):
|
|
||||||
uri = args['uri']
|
|
||||||
self.uid = args['uid']
|
|
||||||
dbname = args['dbname']
|
|
||||||
self.server = Server(uri=uri)
|
|
||||||
self.dbase = self.server.get_db(dbname)
|
|
||||||
if self.dbase.doc_exist(self.uid) == False:
|
|
||||||
self.dbase.save_doc({"_id":self.uid})
|
|
||||||
"""
|
|
||||||
Insuring the preconditions are met for processing
|
|
||||||
"""
|
|
||||||
def isready(self):
|
|
||||||
p = self.server.info() != {}
|
|
||||||
if p == False or self.dbase.dbname not in self.server.all_dbs():
|
|
||||||
return False
|
|
||||||
#
|
|
||||||
# At this point we are sure that the server is connected
|
|
||||||
# We are also sure that the database actually exists
|
|
||||||
#
|
|
||||||
q = self.dbase.doc_exist(self.uid)
|
|
||||||
if q == False:
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
def view(self,id,**args):
|
|
||||||
r =self.dbase.view(id,**args)
|
|
||||||
r = r.all()
|
|
||||||
return r[0]['value'] if len(r) > 0 else []
|
|
||||||
|
|
||||||
"""
|
|
||||||
This function will read an attachment from couchdb and return it to calling code. The attachment must have been placed before hand (otherwise oops)
|
|
||||||
@T: Account for security & access control
|
|
||||||
"""
|
|
||||||
class CouchdbReader(Couchdb,Reader):
|
|
||||||
"""
|
|
||||||
@param filename filename (attachment)
|
|
||||||
"""
|
|
||||||
def __init__(self,**args):
|
|
||||||
#
|
|
||||||
# setting the basic parameters for
|
|
||||||
Couchdb.__init__(self,**args)
|
|
||||||
if 'filename' in args :
|
|
||||||
self.filename = args['filename']
|
|
||||||
else:
|
|
||||||
self.filename = None
|
|
||||||
|
|
||||||
def isready(self):
|
|
||||||
#
|
|
||||||
# Is the basic information about the database valid
|
|
||||||
#
|
|
||||||
p = Couchdb.isready(self)
|
|
||||||
|
|
||||||
if p == False:
|
|
||||||
return False
|
|
||||||
#
|
|
||||||
# The database name is set and correct at this point
|
|
||||||
# We insure the document of the given user has the requested attachment.
|
|
||||||
#
|
|
||||||
|
|
||||||
doc = self.dbase.get(self.uid)
|
|
||||||
|
|
||||||
if '_attachments' in doc:
|
|
||||||
r = self.filename in doc['_attachments'].keys()
|
|
||||||
|
|
||||||
else:
|
|
||||||
r = False
|
|
||||||
|
|
||||||
return r
|
|
||||||
def stream(self):
|
|
||||||
content = self.dbase.fetch_attachment(self.uid,self.filename).split('\n') ;
|
|
||||||
i = 1
|
|
||||||
for row in content:
|
|
||||||
yield row
|
|
||||||
if size > 0 and i == size:
|
|
||||||
break
|
|
||||||
i = i + 1
|
|
||||||
|
|
||||||
def read(self,size=-1):
|
|
||||||
if self.filename is not None:
|
|
||||||
self.stream()
|
|
||||||
else:
|
|
||||||
return self.basic_read()
|
|
||||||
def basic_read(self):
|
|
||||||
document = self.dbase.get(self.uid)
|
|
||||||
del document['_id'], document['_rev']
|
|
||||||
return document
|
|
||||||
"""
|
|
||||||
This class will write on a couchdb document provided a scope
|
|
||||||
The scope is the attribute that will be on the couchdb document
|
|
||||||
"""
|
|
||||||
class CouchdbWriter(Couchdb,Writer):
|
|
||||||
"""
|
|
||||||
@param uri host & port reference
|
|
||||||
@param uid user id involved
|
|
||||||
@param filename filename (attachment)
|
|
||||||
@param dbname database name (target)
|
|
||||||
"""
|
|
||||||
def __init__(self,**args):
|
|
||||||
|
|
||||||
Couchdb.__init__(self,**args)
|
|
||||||
uri = args['uri']
|
|
||||||
self.uid = args['uid']
|
|
||||||
if 'filename' in args:
|
|
||||||
self.filename = args['filename']
|
|
||||||
else:
|
|
||||||
self.filename = None
|
|
||||||
dbname = args['dbname']
|
|
||||||
self.server = Server(uri=uri)
|
|
||||||
self.dbase = self.server.get_db(dbname)
|
|
||||||
#
|
|
||||||
# If the document doesn't exist then we should create it
|
|
||||||
#
|
|
||||||
|
|
||||||
"""
|
|
||||||
write a given attribute to a document database
|
|
||||||
@param label scope of the row repair|broken|fixed|stats
|
|
||||||
@param row row to be written
|
|
||||||
"""
|
|
||||||
def write(self,**params):
|
|
||||||
|
|
||||||
document = self.dbase.get(self.uid)
|
|
||||||
label = params['label']
|
|
||||||
|
|
||||||
|
|
||||||
if 'row' in params :
|
|
||||||
row = params['row']
|
|
||||||
row_is_list = isinstance(row,list)
|
|
||||||
if label not in document :
|
|
||||||
document[label] = row if row_is_list else [row]
|
|
||||||
elif isinstance(document[label][0],list) :
|
|
||||||
document[label] += row
|
|
||||||
|
|
||||||
else:
|
|
||||||
document[label].append(row)
|
|
||||||
else :
|
|
||||||
if label not in document :
|
|
||||||
document[label] = {}
|
|
||||||
if isinstance(params['data'],object) :
|
|
||||||
|
|
||||||
document[label] = dict(document[label],**params['data'])
|
|
||||||
else:
|
|
||||||
document[label] = params['data']
|
|
||||||
|
|
||||||
# if label not in document :
|
|
||||||
# document[label] = [] if isinstance(row,list) else {}
|
|
||||||
# if isinstance(document[label],list):
|
|
||||||
# document[label].append(row)
|
|
||||||
# else :
|
|
||||||
# document[label] = dict(document[label],**row)
|
|
||||||
self.dbase.save_doc(document)
|
|
||||||
def flush(self,**params) :
|
|
||||||
|
|
||||||
size = params['size'] if 'size' in params else 0
|
|
||||||
has_changed = False
|
|
||||||
document = self.dbase.get(self.uid)
|
|
||||||
for key in document:
|
|
||||||
if key not in ['_id','_rev','_attachments'] :
|
|
||||||
content = document[key]
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
if isinstance(content,list) and size > 0:
|
|
||||||
index = len(content) - size
|
|
||||||
content = content[index:]
|
|
||||||
document[key] = content
|
|
||||||
|
|
||||||
else:
|
|
||||||
document[key] = {}
|
|
||||||
has_changed = True
|
|
||||||
|
|
||||||
self.dbase.save_doc(document)
|
|
||||||
|
|
||||||
def archive(self,params=None):
|
|
||||||
document = self.dbase.get(self.uid)
|
|
||||||
content = {}
|
|
||||||
_doc = {}
|
|
||||||
for id in document:
|
|
||||||
if id in ['_id','_rev','_attachments'] :
|
|
||||||
_doc[id] = document[id]
|
|
||||||
else:
|
|
||||||
content[id] = document[id]
|
|
||||||
|
|
||||||
content = json.dumps(content)
|
|
||||||
document= _doc
|
|
||||||
now = str(datetime.today())
|
|
||||||
|
|
||||||
name = '-'.join([document['_id'] , now,'.json'])
|
|
||||||
self.dbase.save_doc(document)
|
|
||||||
self.dbase.put_attachment(document,content,name,'application/json')
|
|
||||||
"""
|
|
||||||
This class acts as a factory to be able to generate an instance of a Reader/Writer
|
|
||||||
Against a Queue,Disk,Cloud,Couchdb
|
|
||||||
The class doesn't enforce parameter validation, thus any error with the parameters sent will result in a null Object
|
|
||||||
"""
|
|
||||||
class DataSourceFactory:
|
|
||||||
def instance(self,**args):
|
|
||||||
source = args['type']
|
|
||||||
params = args['args']
|
|
||||||
anObject = None
|
|
||||||
|
|
||||||
if source in ['HttpRequestReader','HttpSessionWriter']:
|
|
||||||
#
|
|
||||||
# @TODO: Make sure objects are serializable, be smart about them !!
|
|
||||||
#
|
|
||||||
aClassName = ''.join([source,'(**params)'])
|
|
||||||
|
|
||||||
|
|
||||||
else:
|
|
||||||
|
|
||||||
stream = json.dumps(params)
|
|
||||||
aClassName = ''.join([source,'(**',stream,')'])
|
|
||||||
try:
|
|
||||||
|
|
||||||
|
|
||||||
anObject = eval( aClassName)
|
|
||||||
#setattr(anObject,'name',source)
|
|
||||||
except Exception,e:
|
|
||||||
print ['Error ',e]
|
|
||||||
return anObject
|
|
||||||
"""
|
|
||||||
This class implements a data-source handler that is intended to be used within the context of data processing, it allows to read/write anywhere transparently.
|
|
||||||
The class is a facade to a heterogeneous class hierarchy and thus simplifies how the calling code interacts with the class hierarchy
|
|
||||||
"""
|
|
||||||
class DataSource:
|
|
||||||
def __init__(self,sourceType='Disk',outputType='Disk',params={}):
|
|
||||||
self.Input = DataSourceFactory.instance(type=sourceType,args=params)
|
|
||||||
self.Output= DataSourceFactory.instance(type=outputType,args=params)
|
|
||||||
def read(self,size=-1):
|
|
||||||
return self.Input.read(size)
|
|
||||||
def write(self,**args):
|
|
||||||
self.Output.write(**args)
|
|
||||||
#p = {}
|
|
||||||
#p['host'] = 'dev.the-phi.com'
|
|
||||||
#p['uid'] = 'nyemba@gmail.com'
|
|
||||||
#p['qid'] = 'repair'
|
|
||||||
#factory = DataSourceFactory()
|
|
||||||
#o = factory.instance(type='QueueReader',args=p)
|
|
||||||
#print o is None
|
|
||||||
#q = QueueWriter(host='dev.the-phi.com',uid='nyemba@gmail.com')
|
|
||||||
#q.write(object='steve')
|
|
||||||
#q.write(object='nyemba')
|
|
||||||
#q.write(object='elon')
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in new issue