You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			73 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			73 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
| """
 | |
| This file is a wrapper around pandas built-in functionalities to handle character delimited files
 | |
| """
 | |
| import pandas as pd
 | |
| import numpy as np
 | |
| import os
 | |
| class File :	
 | |
| 	def __init__(self,**params):
 | |
| 		"""
 | |
| 			
 | |
| 			@param	path	absolute path of the file to be read
 | |
| 		"""
 | |
| 		self.path 		= params['path'] if 'path' in params else None
 | |
| 		self.delimiter	= params['delimiter'] if 'delimiter' in params else ','
 | |
| 		self._chunksize = None if 'chunksize' not in params else int(params['chunksize'])
 | |
| 	def isready(self):
 | |
| 		return os.path.exists(self.path) 
 | |
| 	def meta(self,**_args):
 | |
| 		return []
 | |
| 	
 | |
| class Reader (File):
 | |
| 	"""
 | |
| 	This class is designed to read data from disk (location on hard drive)
 | |
| 	@pre : isready() == True
 | |
| 	"""
 | |
| 	
 | |
| 	def __init__(self,**_args):
 | |
| 		super().__init__(**_args)
 | |
| 	def _stream(self,path) :
 | |
| 		reader = pd.read_csv(path,sep=self.delimiter,chunksize=self._chunksize,low_memory=False)
 | |
| 		for segment in reader :
 | |
| 			yield segment
 | |
| 	def read(self,**args):
 | |
| 		_path = self.path if 'path' not in args else args['path']
 | |
| 		_delimiter = self.delimiter if 'delimiter' not in args else args['delimiter']
 | |
| 		
 | |
| 		return pd.read_csv(_path,sep=self.delimiter) if not self._chunksize else self._stream(_path)
 | |
| 	def stream(self,**args):
 | |
| 		raise Exception ("streaming needs to be implemented")
 | |
| class Writer (File):
 | |
| 
 | |
| 	"""
 | |
| 		This function writes output to disk in a designated location. The function will write a text to a text file
 | |
| 		- If a delimiter is provided it will use that to generate a xchar-delimited file
 | |
| 		- If not then the object will be dumped as is
 | |
| 	"""
 | |
| 	# THREAD_LOCK = RLock()
 | |
| 	def __init__(self,**_args):
 | |
| 		super().__init__(**_args)
 | |
| 		self._mode = 'w' if 'mode' not in _args else _args['mode']
 | |
| 	
 | |
| 	def write(self,info,**_args):
 | |
| 		"""
 | |
| 			This function writes a record to a designated file
 | |
| 			@param	label	<passed|broken|fixed|stats>
 | |
| 			@param	row	row to be written
 | |
| 		"""
 | |
| 		try:
 | |
| 			
 | |
| 			_delim = self.delimiter if 'delimiter' not in _args else _args['delimiter']
 | |
| 			_path = self.path if 'path' not  in _args else _args['path']
 | |
| 			_mode = self._mode if 'mode' not in _args else _args['mode']
 | |
| 			info.to_csv(_path,index=False,sep=_delim)
 | |
| 			
 | |
| 			pass
 | |
| 		except Exception as e:
 | |
| 			#
 | |
| 			# Not sure what should be done here ...
 | |
| 			print (e)
 | |
| 			pass
 | |
| 		finally:
 | |
| 			# DiskWriter.THREAD_LOCK.release()
 | |
| 			pass | 
