Source code for kClusterLib.pca

import numpy as np
from kcTools import loadKCFromDisk, separateClusters
from kcTools import halt
from kcTools import savePcaData
from usage import createClusters
import time

# debug message printing control
DEBUG = False#True




[docs]def extractRelations( spnd_ids, inpMat, threshVarCap): ''' Makes pca model and saves it in a '.np' file. Parameters ---------- spnd_ids : list list of SPND IDs that belong to same cluster inpMat : ndArray Matrix containing the SPND values threshVarCap : float Percent Threshold (Given as fraction) to select the Eigenvalues. Returns ------- relationEvectors : ndArray Singular Matrix covResidualMat : ndArray covariance Matrix Example ------- >>> cluster, inpMat, labels, combinedMean, combinedVar = loadKCFromDisk() >>> clusterlist=separateClusters(cluster) >>> threshVarCap=0.01 >>> for i in range(max(cluster)+1): >>> a,b = extractRelations(clusterlist[i],inpMat,threshVarCap) >>> print a,b ''' #each cluster is having some sensors' name #callcluster function form a matrix which #is having the sample data corresponding #to those sensors timeinstances,spnds =np.shape(np.matrix(inpMat)) clusterMat=[] cluster = spnd_ids temp=[] for i in range(len(cluster)): x = inpMat[:,cluster[i]] temp.append(x) clusterMat.append(temp) clusterMat= np.array(clusterMat) clusterMat = np.matrix(clusterMat) covMat= (clusterMat * clusterMat.transpose()) / timeinstances #print "Covariance matrix:\n",covMat evalues, evectors = np.linalg.eigh(covMat) evectors = evectors.transpose() ##...each column is the eigenvector #print "evectors:\n", evectors forceFlag = False if np.sum(evalues) == 0: print "Error: sum of eigenvalues is zero" halt() ## flag forceFlag = True print "WARNING : further calculations will be based on sum(evalues)=1 !" idx = evalues.argsort()[0::] #for sorting in ascending order #idx = evalues.argsort()[::-1] #for sorting in descending order evalues= evalues[idx] evectors= evectors[:,idx] #print "Eigenvalues:\n", evalues,"\nEigenvectors:\n", evectors if DEBUG: print "\nNumber of Eigenvalues:", len(evalues) relationEvectors=[] relationEvalues=[] sumevalues = 0 if not forceFlag: sumofallevalues = np.sum(evalues) else: sumofallevalues = 1 for i in range(len(evalues)): sumevalues +=evalues[i] r = sumevalues/sumofallevalues #print "ratio",r if r<=threshVarCap: relationEvectors.append(evectors[i]) relationEvalues.append(evalues[i]) else: break if DEBUG: print "Number of validRelations:", len(np.array(relationEvectors)) if len(np.array(relationEvectors)) == 0: relationEvectors=evectors[0] relationEvalues=evalues[0] print "No valid relation has been found thus the smallest eigenvalue & corresponding eigenvector have been taken!!!" relationEvectors = np.array(relationEvectors) relationEvectors = np.matrix(relationEvectors) if DEBUG: print "covMat size: ",covMat.shape residualMat = relationEvectors*clusterMat #print "residualMat:\n",residualMat # time profiling result: method II works faster # t0 = time.time() # covResidualMat1 = residualMat*residualMat.transpose()/timeinstances # t1 = time.time() # print "timetaken for calculation1 : {}".format(t1-t0) # t1 = time.time() covResidualMat= relationEvectors*covMat*relationEvectors.transpose() # print "timetaken for calculation2 : {}".format(time.time()-t1) if DEBUG: print "relationEvectors(A) size:",relationEvectors.shape print "covResidualMat(V) size: ", covResidualMat.shape #print "covResidualMat(V):\n",covResidualMat #print "covResidualMat1 size: ", covResidualMat1.shape #print "covResidualMat1:\n",covResidualMat1 #print "inv of covResidualMat(V):\n",np.linalg.inv(covResidualMat) return relationEvectors, covResidualMat
[docs]def makeModels( eigenThreshold, clusterMap, dataMat ): ''' Creates and stores PCA Models in disk. Parameters ---------- eigenThreshold : float Percentage Value to filter out the Eigen values. dataMat : ndArray Sensor Data matrix on which model is to be created clusterMap : list Mapping of SPND <--> Cluster ID Returns ------- fname : string Name of file created on disk. ''' #cluster, inpMat, labels, combinedMean, combinedVar = loadKCFromDisk() #if 'eigenThreshold' in kwargs: # threshVarCap = kwargs['eigenThreshold'] #else: #threshVarCap = 0.01 threshVarCap = eigenThreshold inpMat = dataMat clusterList = separateClusters(clusterMap) #totalA = np.array([]) totalA = [] #totalB = np.array([]) totalB = [] for i in range(max(clusterMap)+1): a,b = extractRelations(clusterList[i], inpMat, threshVarCap) #print type(a), type(b) totalA.append(a) totalB.append(b) return savePcaData(totalA,totalB)
if __name__ == '__main__': clusterMap, dataMat, labels, combinedMean, combinedVar = loadKCFromDisk() makeModels(0.01, clusterMap, dataMat) exit() #createClusters(npass=8) cluster, inpMat, labels, combinedMean, combinedVar = loadKCFromDisk() if DEBUG: print "Total number of clusters: ",max(cluster)+1 clusterlist=separateClusters(cluster) #if DEBUG: print "Cluster result:\n",clusterlist threshVarCap=0.01 #np.set_printoptions(formatter={'float': lambda x: "{0:0.0f}".format(x)}) for i in range(max(cluster)+1): a,b = extractRelations(clusterlist[i],inpMat,threshVarCap) print "Iteration:\t\t\t{}".format(i) print a,b #print "For",i,"th cluster Covariance Residual Matrix:\n",b#*1000000,"\n" #print "relationEvectors\n",a