Function iaplotDecisionBoundaries

Synopse

This function returns an image with a plot of the decision boundary found by OPF or k-NN in a 2-class bidimensional classification problem.

  • img = iaplotDecisionBoundaries(X,Y,h,method = 'kNN',k = 1, ker = 'rbf')

    • Output

      • img: Image that shows the method decision boundary and its training samples in a figure.
    • Input

      • X: Training samples;
      • Y: Labels of the training samples;
      • h: Resolution desired (the smaller the better, ex. 0.001);
      • method: 'kNN', 'SVM' or 'OPF',;
      • k: If method='kNN' then an integer k has to be provided. If method ='SVM' then k corresponds to the regularization term;
      • ker: If method ='SVM' then ker corresponds to SVM's kernel.

Description

This function returns an image with a plot of the decision boundary found by OPF, SVM or k-NN in a 2-class bidimensional classification problem. The method is a brute force method, since it evaluates the classifiers for every possible test sample between [Xmin,Xmax] with the desired resolution h.

Function Code

 1 from numpy import*
 2 from courseIA368Q1S2012.rob_biblio2 import*
 3 from sklearn import neighbors, datasets
 4 from iaOPF import iafit, iapredict
 5 from courseIA368Q1S2012.and_lib_0 import figureToArray
 6 import ia636, ia870
 7 from math import ceil,floor
 8 from sklearn import svm, tree
 9 
10 def iaplotDecisionBoundaries(X,Y,h,method = 'kNN',k=1,ker = 'rbf'):
11 
12    # point in the mesh [x_min, m_max]x[y_min, y_max].
13    x_min, x_max = floor(X[:, 0].min()- 0.1), ceil(X[:, 0].max()+0.1)
14    y_min, y_max = floor(X[:, 1].min()- 0.1), ceil(X[:, 1].max()+0.1)
15    xx, yy = meshgrid(arange(x_min, x_max, h),
16                      arange(y_min, y_max, h))
17 
18 
19    opf_pc, _, opf_labels = iafit(X,Y,euclidian)
20 
21    if (method == 'kNN'):
22       clf = neighbors.KNeighborsClassifier(k,weights='distance')
23       clf.fit(X,Y)
24       Z = clf.predict(c_[xx.ravel(), yy.ravel()])
25 
26    if (method == 'OPF'):
27       Z = iapredict(c_[xx.ravel(), yy.ravel()], X, opf_pc, opf_labels, euclidian)
28 
29    if (method == 'SVM'):
30       trainer =  svm.SVC(kernel=ker,C = k)
31       trainer.fit(X,Y)
32       Z = trainer.predict(c_[xx.ravel(), yy.ravel()])
33 
34    if (method == 'DT'):
35       clf = tree.DecisionTreeClassifier()
36       clf = clf.fit(X, Y)
37       Z = clf.predict(c_[xx.ravel(), yy.ravel()])
38 
39    # Put the result into a color plot
40    Z = Z.reshape(xx.shape)
41 
42 
43    Ap =  zeros(shape(Z)).astype(bool)
44    Bp =  Ap.copy()
45    Anp = Ap.copy()
46    Bnp = Ap.copy()
47 
48    prot_A = X[(opf_pc == 0) & (opf_labels==1),:].tolist()
49    prot_B = X[(opf_pc == 0) & (opf_labels!=1),:].tolist()
50    samp_A = X[(opf_pc != 0) & (opf_labels==1),:].tolist()
51    samp_B = X[(opf_pc != 0) & (opf_labels!=1),:].tolist()
52 
53 
54    for samp in prot_A:
55       Ap[(samp[1]-y_min)/h -1,(samp[0]-x_min)/h-1 ] = 1
56 
57    for samp in prot_B:
58       Bp[(samp[1]-y_min)/h -1,(samp[0]-x_min)/h-1] = 1
59 
60    for samp in samp_A:
61       Anp[(samp[1]-y_min)/h -1,(samp[0]-x_min)/h-1 ] = 1
62 
63    for samp in samp_B:
64       Bnp[(samp[1]-y_min)/h -1,(samp[0]-x_min)/h-1] = 1
65 
66 
67    imgA =  ia870.iaunion(ia870.iadil(Ap,ia870.iasedisk(3)),ia870.iadil(Anp,ia870.iasebox(3)))
68    imgB = ia870.iaunion(ia870.iadil(Bp,ia870.iasedisk(3)),ia870.iadil(Bnp,ia870.iasebox(3)))
69    background_image = (ones(shape(Z))*125).astype(uint8)
70    boundary = ia870.iagradm(Z.astype(bool))
71    img = ia636.iagshow(background_image,Z==1,Z!=1,boundary,imgA,imgB)
72 
73 
74    return  img

Examples

This example shows the boundaries found for a synthetic dataset named "Spirals".

Example 1:

 1 import scipy.io
 2 import random
 3 from courseIA368Q1S2012.rob_biblio2 import setTrainingTestSets, euclidian
 4 from iaplotdecisionboundaries import iaplotDecisionBoundaries
 5 
 6 
 7 spirals_data = scipy.io.loadmat(find_attachment_file('courseIA368Q1S2012/rob_proj_teste_spirals/spirals.mat'))
 8 spirals_data = spirals_data['spirals']
 9 feats = spirals_data[:,:-1]
10 labels = spirals_data[:,-1]
11 labels[labels==2]=0
12 
13 random.seed(0)
14 feats_train,labels_train,_ ,_ =   setTrainingTestSets(feats,labels,0.3)
15 h = 0.05
16 img1NN = iaplotDecisionBoundaries(feats_train,labels_train, h, method = 'kNN',k=1)
17 adshow(img1NN, '1-NN Decision Boundary')
18 imgOPF = iaplotDecisionBoundaries(feats_train,labels_train, h, method = 'OPF')
19 adshow(imgOPF, 'OPF Decision Boundary')
20 imgSVM = iaplotDecisionBoundaries(feats_train,labels_train, h, method = 'SVM',k=0.0001,ker = 'rbf')
21 adshow(imgSVM, 'SVM Decision Boundary')
22 imgDT = iaplotDecisionBoundaries(feats_train,labels_train, h, method = 'DT')
23 adshow(imgDT, 'DT Decision Boundary')
/usr/lib/python2.6/dist-packages/scipy/io/matlab/mio.py:84: FutureWarning: Using struct_as_record default value (False) This will change to True in future versions
  return MatFile5Reader(byte_stream, **kwargs)
/usr/local/lib/python2.6/dist-packages/sklearn/svm/classes.py:184: FutureWarning: SVM: scale_C will be True by default in scikit-learn 0.11
  cache_size, scale_C)

1-NN Decision Boundary

OPF Decision Boundary

SVM Decision Boundary

DT Decision Boundary