Source code for kClusterLib.pca
import numpy as np
from kcTools import loadKCFromDisk, separateClusters
from kcTools import halt
from kcTools import savePcaData
from usage import createClusters
import time
# debug message printing control
DEBUG = False#True
[docs]def extractRelations( spnd_ids, inpMat, threshVarCap):
'''
Makes pca model and saves it in a '.np' file.
Parameters
----------
spnd_ids : list
list of SPND IDs that belong to same cluster
inpMat : ndArray
Matrix containing the SPND values
threshVarCap : float
Percent Threshold (Given as fraction) to select the Eigenvalues.
Returns
-------
relationEvectors : ndArray
Singular Matrix
covResidualMat : ndArray
covariance Matrix
Example
-------
>>> cluster, inpMat, labels, combinedMean, combinedVar = loadKCFromDisk()
>>> clusterlist=separateClusters(cluster)
>>> threshVarCap=0.01
>>> for i in range(max(cluster)+1):
>>> a,b = extractRelations(clusterlist[i],inpMat,threshVarCap)
>>> print a,b
'''
#each cluster is having some sensors' name
#callcluster function form a matrix which
#is having the sample data corresponding
#to those sensors
timeinstances,spnds =np.shape(np.matrix(inpMat))
clusterMat=[]
cluster = spnd_ids
temp=[]
for i in range(len(cluster)):
x = inpMat[:,cluster[i]]
temp.append(x)
clusterMat.append(temp)
clusterMat= np.array(clusterMat)
clusterMat = np.matrix(clusterMat)
covMat= (clusterMat * clusterMat.transpose()) / timeinstances
#print "Covariance matrix:\n",covMat
evalues, evectors = np.linalg.eigh(covMat)
evectors = evectors.transpose() ##...each column is the eigenvector
#print "evectors:\n", evectors
forceFlag = False
if np.sum(evalues) == 0:
print "Error: sum of eigenvalues is zero"
halt()
## flag
forceFlag = True
print "WARNING : further calculations will be based on sum(evalues)=1 !"
idx = evalues.argsort()[0::] #for sorting in ascending order
#idx = evalues.argsort()[::-1] #for sorting in descending order
evalues= evalues[idx]
evectors= evectors[:,idx]
#print "Eigenvalues:\n", evalues,"\nEigenvectors:\n", evectors
if DEBUG:
print "\nNumber of Eigenvalues:", len(evalues)
relationEvectors=[]
relationEvalues=[]
sumevalues = 0
if not forceFlag:
sumofallevalues = np.sum(evalues)
else:
sumofallevalues = 1
for i in range(len(evalues)):
sumevalues +=evalues[i]
r = sumevalues/sumofallevalues
#print "ratio",r
if r<=threshVarCap:
relationEvectors.append(evectors[i])
relationEvalues.append(evalues[i])
else:
break
if DEBUG:
print "Number of validRelations:", len(np.array(relationEvectors))
if len(np.array(relationEvectors)) == 0:
relationEvectors=evectors[0]
relationEvalues=evalues[0]
print "No valid relation has been found thus the smallest eigenvalue & corresponding eigenvector have been taken!!!"
relationEvectors = np.array(relationEvectors)
relationEvectors = np.matrix(relationEvectors)
if DEBUG:
print "covMat size: ",covMat.shape
residualMat = relationEvectors*clusterMat
#print "residualMat:\n",residualMat
# time profiling result: method II works faster
# t0 = time.time()
# covResidualMat1 = residualMat*residualMat.transpose()/timeinstances
# t1 = time.time()
# print "timetaken for calculation1 : {}".format(t1-t0)
# t1 = time.time()
covResidualMat= relationEvectors*covMat*relationEvectors.transpose()
# print "timetaken for calculation2 : {}".format(time.time()-t1)
if DEBUG:
print "relationEvectors(A) size:",relationEvectors.shape
print "covResidualMat(V) size: ", covResidualMat.shape
#print "covResidualMat(V):\n",covResidualMat
#print "covResidualMat1 size: ", covResidualMat1.shape
#print "covResidualMat1:\n",covResidualMat1
#print "inv of covResidualMat(V):\n",np.linalg.inv(covResidualMat)
return relationEvectors, covResidualMat
[docs]def makeModels( eigenThreshold, clusterMap, dataMat ):
'''
Creates and stores PCA Models in disk.
Parameters
----------
eigenThreshold : float
Percentage Value to filter out the Eigen values.
dataMat : ndArray
Sensor Data matrix on which model is to be created
clusterMap : list
Mapping of SPND <--> Cluster ID
Returns
-------
fname : string
Name of file created on disk.
'''
#cluster, inpMat, labels, combinedMean, combinedVar = loadKCFromDisk()
#if 'eigenThreshold' in kwargs:
# threshVarCap = kwargs['eigenThreshold']
#else:
#threshVarCap = 0.01
threshVarCap = eigenThreshold
inpMat = dataMat
clusterList = separateClusters(clusterMap)
#totalA = np.array([])
totalA = []
#totalB = np.array([])
totalB = []
for i in range(max(clusterMap)+1):
a,b = extractRelations(clusterList[i], inpMat, threshVarCap)
#print type(a), type(b)
totalA.append(a)
totalB.append(b)
return savePcaData(totalA,totalB)
if __name__ == '__main__':
clusterMap, dataMat, labels, combinedMean, combinedVar = loadKCFromDisk()
makeModels(0.01, clusterMap, dataMat)
exit()
#createClusters(npass=8)
cluster, inpMat, labels, combinedMean, combinedVar = loadKCFromDisk()
if DEBUG:
print "Total number of clusters: ",max(cluster)+1
clusterlist=separateClusters(cluster)
#if DEBUG:
print "Cluster result:\n",clusterlist
threshVarCap=0.01
#np.set_printoptions(formatter={'float': lambda x: "{0:0.0f}".format(x)})
for i in range(max(cluster)+1):
a,b = extractRelations(clusterlist[i],inpMat,threshVarCap)
print "Iteration:\t\t\t{}".format(i)
print a,b #print "For",i,"th cluster Covariance Residual Matrix:\n",b#*1000000,"\n"
#print "relationEvectors\n",a