from db_connector import fetchRAWData
from kcTools import * #centerVectors, prettyPrint, getInterClusterS, loadKCFromDisk, saveKCToDisk, removeFaultySensors
from filterV import RealTimeFilter
import numpy as np
demoFlag = False #True
filteringEnabled = True #False
MeanCenteringEnabled = True #False
###########
# DEFAULT SETTINGS
# --------
SI = 0.5
KMAX = 10
NPASS = 2
DB_HOST = 'localhost'
DB_USERNAME = 'analyst'
DB_PASSWD = 'chemlab'
DB_NAME = 'EXPeriment'
###########
def createClusters(**kwargs):
[docs] '''
Creates fresh cluster on data. Takes keyword-arguments:
Parameters
----------
npass : int (optional)
Input to k-means algorithm(number of random seed initializations), DEFAULT is 2.
start_idx : int (optional)
End index for selecting values from database. Default is 1000
si : float (optional)
Separation parameter input to k-means clustering. Default is 0.5
kmax : int (optional)
Maximum number of clusters. Input to the Optimum cluster finding function.
'''
if 'npass' in kwargs:
npass = kwargs['npass']
else:
npass = NPASS
if 'si' in kwargs:
si = kwargs['si']
else:
si = SI
if 'kmax' in kwargs:
kmax = kwargs['kmax']
else:
kmax = KMAX
if 'start_idx' in kwargs:
sidx = kwargs['start_idx']
else:
sidx= 1 # default value
if 'end_idx' in kwargs:
eidx = kwargs['end_idx']
else:
eidx= 4000 # default value
if 'db' in kwargs:
db = kwargs['db']
else:
db = DB_NAME #default
if 'db_host' in kwargs:
db_host = kwargs['db_host']
else:
db_host = DB_HOST #default
if 'u_name' in kwargs:
user = kwargs['u_name']
else:
user = DB_USERNAME
if 'u_pass' in kwargs:
u_pass = kwargs['u_pass']
else:
u_pass = DB_PASSWD #default password for the user
if demoFlag:
kcluster = loadKCFromDisk(debug=False)
if isinstance(kcluster, (np.ndarray, np.generic)):
# kcluster was OK
prettyPrint(kcluster)
exit()
matCo, labelCo = fetchRAWData(debug=False, \
db_host = db_host, \
dbase = db, \
idx_start = sidx, \
idx_end = eidx, \
user = user, \
u_pass = u_pass, \
table='sensor_co')
matV, labelV = fetchRAWData(debug=False, \
db_host = db_host, \
dbase = db, \
idx_start = sidx, \
idx_end = eidx, \
user = user, \
u_pass = u_pass, \
table= 'sensor_v')
#------ 1. Faulty SPND Elimination-------#
# Faulty SPND's have 25% or more faulty values
matCo, labelCo = removeFaultySensors(matCo, labelCo, 0, 25, stype='Co-')
## Processing Vanadium RAW data
matV, labelV = removeFaultySensors(matV, labelV, 0 , 25, stype='V-')
#-----------------------------------------#
#------ 2. Faulty value mask creation ----#
r,c = matCo.shape
## Create MASK for faulty Cobalt data
maskCo = [1 for i in range(r)]
for i in range(r):
# get the smallest value in each row
E = np.min(matCo[i,:])
# if the smallest is nan or Zero.. MASK IT
if ( E < 0 ) or np.isnan( E ):
maskCo[i] = 0
## MASK for Vanadium faulty values
r,c = matV.shape
maskV = [1 for i in range(r)]
for i in range(r):
E = np.min(matV[i,:])
if ( E < 0 ) or np.isnan( E ):
maskV[i] = 0
combinedMask = np.multiply(maskCo, maskV)
boolMask = np.array(combinedMask, dtype=bool)
# mask is prepared
#------------------------------------------#
#----- 3. Filtering of data ---------------#
if filteringEnabled:
r,c = matCo.shape
resultMat = np.zeros(shape=(r,c))
resultMat[0,:] = RealTimeFilter( matCo[0,:], isInitial=True)
for i in range(1,r):
resultMat[i,:] = RealTimeFilter(matCo[i,:])
matCo = resultMat
## matCo = tf_V(matCo)
#-----------------------------------------#
#----- 4. Cleaning Data ------------------#
cleanMatCo = matCo[boolMask,:]
cleanMatV = matV[boolMask,:]
#-----------------------------------------#
#----- 5. Mean centering -----------------#
if MeanCenteringEnabled:
matCo, meanCo, varCo = centerVectors(cleanMatCo)
matV, meanV, varV = centerVectors(cleanMatV)
## WARNING: we are not deleting rows based on timestamps
#-----------------------------------------#
#----- 6. Concatenation of results( Cleaned matrices )
combinedMat = np.concatenate((matCo, matV),axis=1)
combinedLabels = list(labelCo)+list(labelV)
combinedMean = meanCo + meanV
combinedVar = varCo + varV
r,c = combinedMat.shape
if DEBUG:
print("DEBUG: r,c = {},{}".format(r,c))
#-----------------------------------------#
#----- 7. b Clustering ---------------------#
#kcluster, error, freq = getOptimalCluster( combinedMat, 0.5, 15, npass = npass)
kcluster, error, freq = getOptimalCluster( combinedMat, si, kmax, npass = npass)
#-----------------------------------------#
#----- 7. a Singletton Cluster Merging ---#
# while True: #infinite loop
# # Access clusters to see if any singleton exist
# # If yes merge it with a nearest neighbour
# # If no then Break the Loop
# clusterId, flag = getSingleton(kcluster)
# if flag:
# mergeWithNeighbour(kcluster, clusterId, combinedMat)
# else:
# break
while( isSingletonCluster( kcluster ) ):
kcluster = mergeSingletonCluster(kcluster, combinedMat)
#----- 8. Display the final cluster ------#
#prettyPrint(kcluster,combinedLabels)
#-----------------------------------------#
#----- 9. Save Results to disk ------#
name = saveKCToDisk(kcluster, combinedMat, combinedLabels, combinedMean, combinedVar)
#print "\r\n",name
#print "Cluster Results saved to disk .."
#-----------------------------------------#
return name
#exit()
if __name__ == '__main__':
createClusters(kmax=10,npass=10,start_idx=5,end_idx=13000,si=0.3)