Source code for kClusterLib.usage

from db_connector import fetchRAWData
from kcTools import * #centerVectors, prettyPrint, getInterClusterS, loadKCFromDisk, saveKCToDisk, removeFaultySensors
from filterV import RealTimeFilter
import numpy as np


demoFlag = False #True
filteringEnabled = True #False
MeanCenteringEnabled = True #False

###########
# DEFAULT SETTINGS
# --------
SI = 0.5
KMAX = 10
NPASS = 2
DB_HOST = 'localhost'
DB_USERNAME =  'analyst'
DB_PASSWD = 'chemlab'
DB_NAME = 'EXPeriment'
###########






def createClusters(**kwargs):
[docs] ''' Creates fresh cluster on data. Takes keyword-arguments: Parameters ---------- npass : int (optional) Input to k-means algorithm(number of random seed initializations), DEFAULT is 2. start_idx : int (optional) End index for selecting values from database. Default is 1000 si : float (optional) Separation parameter input to k-means clustering. Default is 0.5 kmax : int (optional) Maximum number of clusters. Input to the Optimum cluster finding function. ''' if 'npass' in kwargs: npass = kwargs['npass'] else: npass = NPASS if 'si' in kwargs: si = kwargs['si'] else: si = SI if 'kmax' in kwargs: kmax = kwargs['kmax'] else: kmax = KMAX if 'start_idx' in kwargs: sidx = kwargs['start_idx'] else: sidx= 1 # default value if 'end_idx' in kwargs: eidx = kwargs['end_idx'] else: eidx= 4000 # default value if 'db' in kwargs: db = kwargs['db'] else: db = DB_NAME #default if 'db_host' in kwargs: db_host = kwargs['db_host'] else: db_host = DB_HOST #default if 'u_name' in kwargs: user = kwargs['u_name'] else: user = DB_USERNAME if 'u_pass' in kwargs: u_pass = kwargs['u_pass'] else: u_pass = DB_PASSWD #default password for the user if demoFlag: kcluster = loadKCFromDisk(debug=False) if isinstance(kcluster, (np.ndarray, np.generic)): # kcluster was OK prettyPrint(kcluster) exit() matCo, labelCo = fetchRAWData(debug=False, \ db_host = db_host, \ dbase = db, \ idx_start = sidx, \ idx_end = eidx, \ user = user, \ u_pass = u_pass, \ table='sensor_co') matV, labelV = fetchRAWData(debug=False, \ db_host = db_host, \ dbase = db, \ idx_start = sidx, \ idx_end = eidx, \ user = user, \ u_pass = u_pass, \ table= 'sensor_v') #------ 1. Faulty SPND Elimination-------# # Faulty SPND's have 25% or more faulty values matCo, labelCo = removeFaultySensors(matCo, labelCo, 0, 25, stype='Co-') ## Processing Vanadium RAW data matV, labelV = removeFaultySensors(matV, labelV, 0 , 25, stype='V-') #-----------------------------------------# #------ 2. Faulty value mask creation ----# r,c = matCo.shape ## Create MASK for faulty Cobalt data maskCo = [1 for i in range(r)] for i in range(r): # get the smallest value in each row E = np.min(matCo[i,:]) # if the smallest is nan or Zero.. MASK IT if ( E < 0 ) or np.isnan( E ): maskCo[i] = 0 ## MASK for Vanadium faulty values r,c = matV.shape maskV = [1 for i in range(r)] for i in range(r): E = np.min(matV[i,:]) if ( E < 0 ) or np.isnan( E ): maskV[i] = 0 combinedMask = np.multiply(maskCo, maskV) boolMask = np.array(combinedMask, dtype=bool) # mask is prepared #------------------------------------------# #----- 3. Filtering of data ---------------# if filteringEnabled: r,c = matCo.shape resultMat = np.zeros(shape=(r,c)) resultMat[0,:] = RealTimeFilter( matCo[0,:], isInitial=True) for i in range(1,r): resultMat[i,:] = RealTimeFilter(matCo[i,:]) matCo = resultMat ## matCo = tf_V(matCo) #-----------------------------------------# #----- 4. Cleaning Data ------------------# cleanMatCo = matCo[boolMask,:] cleanMatV = matV[boolMask,:] #-----------------------------------------# #----- 5. Mean centering -----------------# if MeanCenteringEnabled: matCo, meanCo, varCo = centerVectors(cleanMatCo) matV, meanV, varV = centerVectors(cleanMatV) ## WARNING: we are not deleting rows based on timestamps #-----------------------------------------# #----- 6. Concatenation of results( Cleaned matrices ) combinedMat = np.concatenate((matCo, matV),axis=1) combinedLabels = list(labelCo)+list(labelV) combinedMean = meanCo + meanV combinedVar = varCo + varV r,c = combinedMat.shape if DEBUG: print("DEBUG: r,c = {},{}".format(r,c)) #-----------------------------------------# #----- 7. b Clustering ---------------------# #kcluster, error, freq = getOptimalCluster( combinedMat, 0.5, 15, npass = npass) kcluster, error, freq = getOptimalCluster( combinedMat, si, kmax, npass = npass) #-----------------------------------------# #----- 7. a Singletton Cluster Merging ---# # while True: #infinite loop # # Access clusters to see if any singleton exist # # If yes merge it with a nearest neighbour # # If no then Break the Loop # clusterId, flag = getSingleton(kcluster) # if flag: # mergeWithNeighbour(kcluster, clusterId, combinedMat) # else: # break while( isSingletonCluster( kcluster ) ): kcluster = mergeSingletonCluster(kcluster, combinedMat) #----- 8. Display the final cluster ------# #prettyPrint(kcluster,combinedLabels) #-----------------------------------------# #----- 9. Save Results to disk ------# name = saveKCToDisk(kcluster, combinedMat, combinedLabels, combinedMean, combinedVar) #print "\r\n",name #print "Cluster Results saved to disk .." #-----------------------------------------# return name #exit() if __name__ == '__main__':
createClusters(kmax=10,npass=10,start_idx=5,end_idx=13000,si=0.3)