import numpy as np
import math
import logging
import matplotlib.pyplot as plt
from time import strftime
from scipy.stats import chi2
from db_connector import fetchRAWData
from usage import createClusters
from kcTools import loadPcaData, loadKCFromDisk, separateClusters, isCobalt
from pca import extractRelations
from scipy.signal import tf2ss
#import numpy as np
previousSpnds =0
previousFiltered =0
count = 0
DEBUG = False
k = 1 #1.415*10**-20/60 #A.cm2.min.n-1
tau_p = 23.68/60 #min
tau_d = 315.0/60 #min
T = 1 #sampling period is 1 min
A,B,C,D = tf2ss([k*tau_p, k], [tau_d, 1])
# if DEBUG:
# print "A: ",A
# print "B: ",B
# print "C: ",C
# print "D: ",D
global_ass=(2+A*T)/(2-A*T)
global_cbss=(C*B*T-A*D*T-2*D)/(2-A*T)
global_dss=(C*B*T-A*D*T+2*D)/(2-A*T)
#from filterV import ass, cbss, dss
DEBUG = True
fo = ''
fo1 = ''
logger=''
[docs]def configLOG():
'''
Configure the log file paths and Format of logging.
Parameters
----------
None: None
Takes No Parameters
Returns
-------
None
'''
global fo
global fo1
global logger
logging.basicConfig(filename = 'grossError.csv',format='%(message)s',level=logging.DEBUG)
logger = logging.getLogger(__name__)
logger.debug('TIME STAMP'+','+'Ti'+','+ 'SPND' + ',' + 'CLUSTER'+','+'MESSAGE' +',' +'RAW DATA'+','+'BIAS' + ','+ 'GAMMA' +','+ 'CHI SQUARE')
fo = open("count.csv", "a+")
print "Name of the file: ", fo.name
#print "Closed or not : ", fo.closed
print "Opening mode : ", fo.mode
#print "Softspace flag : ", fo.softspace
fo1 = open("window.csv", "a+")
fo1.write('TIME STAMP'+','+'TI' + ',' + 'SPND' + ',' + 'CLUSTER' + ',' + 'MESSAGE'+ ','+'WINDOWWIDTH' + ',' + 'THRESHOLD' + '\n')
#print "Name of the file: ", fo1.name
#print "Opening mode : ", fo1.mode
[docs]def errorCheck(**kwargs):
'''
Uses pca-model to validate the data.
Parameters
----------
start_idx : int (optional)
Start index for selecting values from database. Default is 1
end_idx : int (optional)
End index for selecting values from database. Default is 4000
db : str
Name of database to connect to for fetching data.
db_host : str
IP of machine running the MySQL server.
u_name : str
User name for connecting to remote MySQL server
u_pass : str
Password for connecting to remote MySQL server
'''
configLOG()
ass = global_ass[0][0]
cbss = global_cbss[0][0]
dss = global_dss[0][0]
##..start and end index..##
if 'start_idx' in kwargs:
sidx = kwargs['start_idx']
else:
sidx= 1 # default value
if 'end_idx' in kwargs:
eidx = kwargs['end_idx']
else:
eidx= 4000 # default value
if 'db' in kwargs:
db = kwargs['db']
else:
db = 'EXPeriment' #default
if 'db_host' in kwargs:
db_host = kwargs['db_host']
else:
db_host = 'localhost' #default
if 'u_name' in kwargs:
user = kwargs['u_name']
else:
user = 'analyst'
if 'u_pass' in kwargs:
u_pass = kwargs['u_pass']
else:
u_pass = 'chemlab' #default password for the user
##..confidence level for chiSquared test..##
alpha = 0.05
##..window test..##
windowWidth = 50
faultThreshold = 0.7
##............Getting data from DB.........................##
rawDataMatCo, colNamesCo = fetchRAWData(debug=False, \
dbase = db,
db_host = db_host,
table ='sensor_co',\
user = user,\
u_pass = u_pass, \
idx_start=sidx, \
idx_end=eidx)
rawDataMatV, colNamesV = fetchRAWData(debug=False, \
dbase = db,
db_host = db_host,
table ='sensor_v',\
user = user,\
u_pass = u_pass, \
idx_start=sidx, \
idx_end=eidx)
rawDataMatCoV = np.concatenate((rawDataMatCo, rawDataMatV),axis=1)
combinedLabels = np.concatenate((colNamesCo,colNamesV),axis=1)
#main(npass = 8, start_idx=1,end_idx=14000)
##...Fetching Cluster Data...##
cluster, inpMat, labels, combinedMean, combinedVar = loadKCFromDisk()
##...remove faulty spnd columns...##
mask = [1 for i in range(len(combinedLabels))] ##np.ones_like(combinedLabels)
for i in combinedLabels:
if not i in labels:
mask[list(combinedLabels).index(i)] = 0
# print "Faulty sensors:",i
fo.write('Faulty sensors' + ',' + str(i) + '\n');
bmask = np.array(mask, dtype = bool)
cleanedLabels = combinedLabels[bmask]
rawDataMatCoV = rawDataMatCoV[:,bmask]
##...rawDataMatCoV is the raw data after cleaning faulty SPNDs...##
#rawDataMatCoV[0,18] += -20
##..SPND 121 to 132 are having value less than hundred..##
rCoV, cCoV = rawDataMatCoV.shape
print "Total number of clean SPNDs :",cCoV
fo.write('No. of Rows in rawDataMatCoV' + ',' + str(rCoV) + '\n');
fo.write('Total number of clean SPNDs' + ',' + str(cCoV) + '\n');
mask = [isCobalt( i, cleanedLabels) for i in range(len(rawDataMatCoV[0,:]))] ##No of cols
bmask = np.array(mask, dtype = bool)
matCo = rawDataMatCoV[:,bmask]
rCo, cCo = matCo.shape
print "Total number of clean Cobalt SPNDs :",cCo
fo.write('Total number of clean Cobalt SPNDs' + ',' + str(cCo) + '\n');
cV = (cCoV-cCo)
print "Total number of clean Vanadium SPNDs:",cV
fo.write('Total number of clean Vanadium SPNDs' + ',' + str(cV) + '\n');
##..................................................................##
clusterlist = separateClusters(cluster)
print "Number of Clusters: ", len(clusterlist)
print "Cluster result:\n",clusterlist
fo.write('Number of Clusters: ' + str(len(clusterlist))+'\n');
for i in range(len(clusterlist)):
clusterlist[i] = np.array(clusterlist[i])
fo.write ('cluster ' + str(i) + ',' + '\n' + str(clusterlist[i]) + '\n');
##.........storing chi2.ppf values................##
chi2ppf=[]
tempchi2 =[]
for n in range(cCoV):
p = chi2.ppf((1-alpha), n)
tempchi2.append(p)
chi2ppf.append(tempchi2)
chi2ppf = np.array(chi2ppf)
##...Make a window which will keep updating if there is any fault in any SPND...##
windowMat = np.zeros(shape = (windowWidth, cCoV))
## ........ LOAD PCA Model From Disk ........ ##
Amat = []
ResVarMat = []
am, rm = loadPcaData()
Amat.append(am)
ResVarMat.append(rm)
Amat = np.array(Amat)
ResVarMat = np.array(ResVarMat)
#print "**** HERE is AmaT:"
#print Amat.shape
#print type(Amat)
#print Amat
#
#print "**** HERE is ResVarMat:"
#print ResVarMat.shape
#print type(ResVarMat)
#print ResVarMat
###.........storing PCA model..................##
##
#threshVarCap = 0.01
#Amat = []
#ResVarMat = []
#tempAmat = []
#tempResVarMat = []
#for clusterk in range(len(clusterlist)):
# a, resVar = extractRelations( clusterlist[clusterk], inpMat, threshVarCap )
# tempAmat.append(a)
# tempResVarMat.append(resVar)
#Amat.append(tempAmat)
#Amat = np.array(Amat)
#ResVarMat.append(tempResVarMat)
#ResVarMat = np.array(ResVarMat)
#print "$$$$ HERE is AmaT:"
#print Amat.shape
#print type(Amat)
#print Amat
#
#print "$$$$ HERE is ResVarMat:"
#print ResVarMat.shape
#print type(ResVarMat)
#print ResVarMat
#
#exit()
###.......Fault count initialization................##
spndFaultCount = np.zeros(shape = (1,cCoV))
clusterFaultCount = np.zeros(shape= (1, len(clusterlist)))
##...for filtering...##
previousCo = np.zeros(shape=(1,cCo))
previousFilteredCo = np.zeros(shape=(1,cCo))
for ti in range(eidx-sidx):
##...for each time ti...##
for clusterj in range(len(clusterlist)):
##...accessing clusters serially...##
##..Initializations
presentData = []
tempy =[]
x=[]
y=[]
tempPreFilCo = np.zeros(shape=(1,cCo))
tempPreCo=np.zeros(shape=(1,cCo))
flagIsNegative = False
##..initialization ENDS
for spndk in range( len( clusterlist[clusterj] )): ##spndk will
##traverse all the
##spnds in that
##particular cluster
##spndId is the currently selected sensor index
spndId = clusterlist[clusterj][spndk]
x = rawDataMatCoV[ti][spndId]
## x is the current value of kth SPND
if spndId <= cCo-1:
##...for Cobalt data filter it...##
if x<=0 or np.isnan(x):
x= previousCo[0][spndId]
y = ass*previousFilteredCo[0][spndId] + cbss*previousCo[0][spndId] + dss*x
tempPreFilCo[0][spndId] = y
tempPreCo[0][spndId] = x
else:
y = x
if x<=0 or np.isnan(x):
flagIsNegative = True
windowMat[windowWidth-1][spndId] = 1
if DEBUG:
print "Sensor output is zero/negative/'nan' for",\
ti,"th instance",\
spndId,"th SPND belongs to",\
clusterj,"'th cluster"
logger.debug(str(strftime("%Y-%m-%d %H:%M:%S"))+','+str(ti)+','+ str(spndId) + ',' + str(clusterj)+','+'Sensor output is zero/negative/nan' +',' +str(x))
## Why don't we skip storing the temp values and not put them directly to the global
## variables Because, We are processing a cluster which has many SPNDS, and the
## NEXT SPND\ may have invalid data then we will discard the previous values
## We discard all the values (for the currrent cluster) if any SPND is zero
spndFaultCount[0][spndId] += 1
z = ( y - combinedMean[spndId]) / math.sqrt( combinedVar[spndId] )
tempy.append(z)
#tempy is a list that has the mean centered + scaled Sensor value
if flagIsNegative:
##...if any entry is zero/negative/'nan' then delete all...##
##...entries for *that* time instance for *that* cluster...##
tempy = []
presentData.append(tempy)
clusterFaultCount[0][clusterj] += 1
else :
presentData.append(tempy)
presentData = np.array(presentData)
#print "presentData",presentData
#print "tempPreCo\n",tempPreCo
#print "tempPreFilCo\n",tempPreFilCo
if not len(presentData[0]) == 0:
## If we are here, that means all the SPNDs of that cluster
## for that instance have meaningful values and we have
## * centered
## * scaled
## * Co data filtered
## Now store the cobalt values in global filter variables for future filtering
for k in range(cCo):
if tempPreFilCo[0][k] == 0:
#no updation required;
continue
else:
previousFilteredCo[0][k] = tempPreFilCo[0][k]
if tempPreCo[0][k] == 0:
# No need for updation of previous input values
continue
else:
previousCo[0][k] = tempPreCo[0][k]
## FETCHING Model
A = Amat[0][clusterj]
VarMatRes = ResVarMat[0][clusterj]
r, c = np.shape( A )
## CALCULATING RESIDUE r(t)
presentResidualData = A * presentData.transpose()
presentResidualData = np.array( presentResidualData )
gamma = presentResidualData.transpose() * np.linalg.inv( VarMatRes ) * presentResidualData
## Residue is above threshold level
if gamma >= chi2ppf[0][r]:
tempL=[]
tempB=[]
Lspndk=[]
Bspndk=[]
for spndk in range(len((clusterlist[clusterj]))):
Espndk= np.zeros(shape=(len(clusterlist[clusterj]),1))
Espndk[spndk][0]=1
Fspndk = A*Espndk
factor = Fspndk.transpose()*np.linalg.inv(VarMatRes)
m1 = factor*Fspndk
m2 = factor*presentResidualData
bspndk = np.linalg.inv(m1)*m2
lspndk = m2.transpose()*bspndk
#bspndk = bspndk.tolist()
#lspndk = lspndk.tolist()
#print "lspndk",lspndk
#print "bspndk",bspndk
tempL.append(lspndk[0,0])
tempB.append(bspndk[0,0])
#print tempL
#Lspndk.append(tempL)
Lspndk = tempL[:]
#print Lspndk
#Bspndk.append(tempB)
Bspndk = tempB[:]
#Lspndk = np.array(Lspndk)
#Bspndk = np.array(Bspndk)
#print "Lspndk",Lspndk
#print "Bspndk",Bspndk
#indexLspndk = np.where(LspndkTruncated == LspndkTruncated.max())
LspndkMax = max(Lspndk)
indexLspndk = Lspndk.index(LspndkMax)
#print "index for maximum Lspndk:",indexLspndk
Bspndk[indexLspndk] = (Bspndk[indexLspndk]*math.sqrt(combinedVar[clusterlist[clusterj][indexLspndk]]))
print "\nChi-squared test violated in cluster",clusterj,"for",ti,"th time instance"
#logger.debug(clusterj)
print "gamma : ",gamma[0,0]
print "chisquare: ",chi2ppf[0][r]
print "Log likelihood ratio maximizer (perhaps nonunique) SPND is",clusterlist[clusterj][indexLspndk]
print "Estimated gross-error of the SPND : ",Bspndk[indexLspndk]
#print "standard deviation : ",math.sqrt(combinedVar[clusterlist[clusterj][indexLspndk]])
#print "mean : ",combinedMean[clusterlist[clusterj][indexLspndk]]
print "Raw data of tha SPND : ",rawDataMatCoV[ti][clusterlist[clusterj][indexLspndk]]
logger.debug(str(strftime("%Y-%m-%d %H:%M:%S"))+','+str(ti)+','+str(clusterlist[clusterj][indexLspndk]) +','+ str(clusterj) +','+'chisquare test violated' +','+ str(rawDataMatCoV[ti][clusterlist[clusterj][indexLspndk]])+','+ str(round(Bspndk[indexLspndk],4)) + ','+ str(round(gamma[0,0],4)) +','+ str(round(chi2ppf[0][r],4)))
##..check if the maximum value of Lspndk is repeating or not..##
for i in range(len(Lspndk)):
if i != indexLspndk:
if Lspndk[i] == LspndkMax:
print "other Log likelihood ratio maximizer SPND is/are: ",clusterlist[clusterj][i]
logger.debug(str(strftime("%Y-%m-%d %H:%M:%S"))+','+str(ti)+','+'other Log likelihood ratio maximizer SPND is/are: ',+','+str(clusterlist[clusterj][i]))
spndFaultCount[0][clusterlist[clusterj][indexLspndk]] += 1
clusterFaultCount[0][clusterj] += 1
windowMat[windowWidth-1][clusterlist[clusterj][indexLspndk]] = 1
#print "windowMat at",ti, "\n",windowMat[windowWidth-1,:]
#print "previousCo\n",previousCo
#print "previousFilteredCo\n",previousFilteredCo
##...to check multiple faults in the present window...##
for spndk in range(cCoV):
if np.sum(windowMat[:,spndk])/windowWidth > faultThreshold:
#print "Faulty SPND is", spndk
for cluster in range(len(clusterlist)):
for spnd in range(len(clusterlist[cluster])):
if clusterlist[cluster][spnd] == spndk:
print "Faulty SPND ", spndk,"cluster",cluster, "at ti",ti
fo1.write(str(strftime("%Y-%m-%d %H:%M:%S")) + ',' + str(ti) +','+ str(spndk) + ',' + str(cluster) + ',' + 'Faulty SPND (window test)' +',' +str(windowWidth)+','+str(faultThreshold)+ '\n');
for width in range(windowWidth-1):
windowMat[width]= windowMat[width+1]
windowMat[windowWidth-1] = np.zeros(shape=(1,cCoV))
print "spndFaultCount\n",spndFaultCount[0]
maxFault = np.max(spndFaultCount[0])
fo.write ('spndFaultCount' + ',' +'\n' + str(spndFaultCount[0]) + '\n');
plt.figure()
index1 = range(len(spndFaultCount[0]))
plt.bar(index1,spndFaultCount[0])
plt.xlabel('SPND')
plt.ylabel('Fault Count')
plt.title('SPND Fault Count')
#plt.grid(True)
plt.savefig ('spndFaultCount.pdf')
#plt.show()
plt.figure()
index2 = range(cCo)
coFaultCount = spndFaultCount[:,range(cCo)]
# print coFaultCount
plt.bar(index2,coFaultCount[0])
plt.xlabel('Co SPND')
plt.ylabel('Fault Count')
plt.title('Co SPND Fault Count'+ ' '+'[' + str(0) + ' ' +'to' +' '+ str(cCo-1) + ']')
#plt.grid(True)
plt.ylim([0, (maxFault*1.05)])
plt.savefig ('cobaltFaultCount.pdf')
#plt.show()
plt.figure()
index3 = range(cCo,(cCo + (cCoV-cCo)/2))
vFaultCount1 = spndFaultCount[:,range(cCo,(cCo + (cCoV-cCo)/2))]
plt.bar(index3,vFaultCount1[0])
plt.xlabel('V SPND')
plt.ylabel('Fault Count')
plt.title('V SPND Fault Count' + ' '+'[' + str(cCo) + ' ' +'to' +' '+ str((cCo + (cCoV-cCo)/2)-1) + ']')
#plt.grid(True)
plt.ylim([0, (maxFault*1.05)])
plt.savefig ('vanadiumFaultCount1.pdf')
#plt.show()
plt.figure()
index4 = range((cCo + (cCoV-cCo)/2),cCoV)
vFaultCount2 = spndFaultCount[:,range((cCo + (cCoV-cCo)/2),cCoV)]
plt.bar(index4,vFaultCount2[0])
plt.xlabel('V SPND')
plt.ylabel('Fault Count')
plt.title('V SPND Fault Count'+ ' ' +'['+ str((cCo + (cCoV-cCo)/2)) + ' ' +'to' + ' ' + str(cCoV-1) + ']')
#plt.grid(True)
plt.ylim([0, (maxFault*1.05)])
plt.savefig ('vanadiumFaultCount2.pdf')
#plt.show()
expectedLimit = (eidx-sidx+1)*alpha
print "clusterFaultCount\n",clusterFaultCount[0]
fo.write ('clusterFaultCount' + ',' + '\n' + str(clusterFaultCount[0]) + '\n');
plt.figure()
index5 = range(len(clusterFaultCount[0]))
plt.bar(index5, clusterFaultCount[0])
plt.axhline(y= expectedLimit, xmin=0, xmax=len(clusterFaultCount[0]) , linewidth=2,linestyle='--', color = 'r')
plt.xlabel('CLUSTER')
plt.ylabel('Fault Count')
plt.title('Cluster Fault Count')
#plt.grid(True)
plt.savefig ('clusterFaultCount.pdf')
#plt.show()
#print "spndFaultCount percentage\n",spndFaultCount[0]*100/(eidx-sidx+1)
#print "clusterFaultCount percentage\n", clusterFaultCount[0]*100/(eidx-sidx+1)
p=0
for i in range(len(clusterlist)):
p += clusterFaultCount[0][i]
print "Total number of instances of fault occurance in all clusters:", p
fo.write ('Total number of instances of fault occurance in all clusters' + ': '+ str(p) + '\n');
frequencyFaultCluster = clusterFaultCount[0]/(eidx-sidx+1)
print "frequencyFaultCluster\n",frequencyFaultCluster
fo.write ('frequencyFaultCluster' + ',' + '\n' + str(frequencyFaultCluster) + '\n');
plt.figure()
index6 = range(len(frequencyFaultCluster))
plt.bar(index6, frequencyFaultCluster)
plt.axhline(y= alpha, xmin=0, xmax=len(clusterFaultCount[0]) , linewidth=2,linestyle='--', color = 'r')
plt.xlabel('CLUSTER')
plt.ylabel('Frequency of fault')
plt.title('Frequency of Cluster Fault')
plt.savefig ('FerqClusterFaultCount.pdf')
#plt.grid(True)
#plt.show()
if __name__ == '__main__':
errorCheck()