|
|
@ -166,7 +166,9 @@ class Components :
|
|
|
|
:param values array of values to be approximated
|
|
|
|
:param values array of values to be approximated
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
if values.dtype in [int,float] :
|
|
|
|
if values.dtype in [int,float] :
|
|
|
|
r = np.random.dirichlet(values)
|
|
|
|
#
|
|
|
|
|
|
|
|
# @TODO: create bins?
|
|
|
|
|
|
|
|
r = np.random.dirichlet(values+.001) #-- dirichlet doesn't work on values with zeros
|
|
|
|
x = []
|
|
|
|
x = []
|
|
|
|
_type = values.dtype
|
|
|
|
_type = values.dtype
|
|
|
|
for index in np.arange(values.size) :
|
|
|
|
for index in np.arange(values.size) :
|
|
|
@ -222,7 +224,7 @@ class Components :
|
|
|
|
dtype = str
|
|
|
|
dtype = str
|
|
|
|
name = _item['name']
|
|
|
|
name = _item['name']
|
|
|
|
novalue = -1
|
|
|
|
novalue = -1
|
|
|
|
if _item['type'] == 'INTEGER' :
|
|
|
|
if _item['type'] in ['INTEGER','NUMERIC']:
|
|
|
|
dtype = np.int64
|
|
|
|
dtype = np.int64
|
|
|
|
|
|
|
|
|
|
|
|
elif _item['type'] == 'FLOAT' :
|
|
|
|
elif _item['type'] == 'FLOAT' :
|
|
|
@ -296,11 +298,11 @@ class Components :
|
|
|
|
# - The original dataset has all the fields except those that need to be synthesized
|
|
|
|
# - The original dataset has all the fields except those that need to be synthesized
|
|
|
|
#
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
|
|
_df = _df[list(set(_df.columns) - set(skip_columns))]
|
|
|
|
_df = _df[list(set(_df.columns) - set(skip_columns))].copy()
|
|
|
|
if x_cols :
|
|
|
|
if x_cols :
|
|
|
|
for _col in x_cols :
|
|
|
|
for _col in x_cols :
|
|
|
|
if real_df[_col].unique().size > 0 :
|
|
|
|
if real_df[_col].unique().size > 0 :
|
|
|
|
_df[_col] = self.approximate(real_df[_col].fillna(-1))
|
|
|
|
_df[_col] = self.approximate(real_df[_col])
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
_df[_col] = -1
|
|
|
|
_df[_col] = -1
|
|
|
|
|
|
|
|
|
|
|
|