Handwritten 2

This digits dataset is shipped with scikits.learn and it's much easier than the one from before.

 1 import time
 2 
 3 import numpy
 4 import libopf_py
 5 
 6 from scikits.learn import datasets, svm, metrics
 7 
 8 digits = datasets.load_digits()
 9 
10 # To apply an classifier on this data, we need to flatten the image, to
11 # turn the data in a (samples, feature) matrix:
12 n_samples = len(digits.images)
13 data = digits.images.reshape((n_samples, -1))
14 
15 def run(split):
16   n_split = int(split*n_samples)
17 
18   print ""
19   print "="*100
20   print ""
21 
22   print "Split: %3.2f" % split
23   print "Size: %d, Classifying Size: %d, Testing Size: %d" % (n_samples, n_split, n_samples-n_split)
24 
25   rand = numpy.random.permutation(n_samples)
26 
27   random_data  = data[rand]
28   random_label = digits.target[rand]
29 
30   data_train,  data_test  = random_data [:n_split], random_data [n_split:]
31   label_train, label_test = random_label[:n_split], random_label[n_split:]
32 
33   print "-"*20, "OPF", "-"*20
34   def opf():
35 
36     # OPF only supports 32 bits datatypes at the moment
37     data_train_32  = data_train.astype(numpy.float32)
38     label_train_32 = label_train.astype(numpy.int32)
39     data_test_32   = data_test.astype(numpy.float32)
40     label_test_32  = label_test.astype(numpy.int32)
41 
42     O = libopf_py.OPF()
43 
44     t = time.time()
45     O.fit(data_train_32, label_train_32)
46     print "OPF: time elapsed in fitting: %f secs" % (time.time()-t)
47 
48     t = time.time()
49     predicted = O.predict(data_test_32)
50     print "OPF: time elapsed in predicting: %f secs" % (time.time()-t)
51 
52     print "Classification report for OPF:\n%s\n" % (metrics.classification_report(label_test_32, predicted))
53     print "Confusion matrix:\n%s" % metrics.confusion_matrix(label_test_32, predicted)
54 
55   opf()
56 
57   print "-"*20, "SVM", "-"*20
58   def _svm():
59 
60     clf = svm.SVC()
61 
62     t = time.time()
63     clf.fit(data_train, label_train)
64     print "SVM: time elapsed in fitting: %f secs" % (time.time()-t)
65 
66     t = time.time()
67     predicted = clf.predict(data_test)
68     print "SVM: time elapsed in predicting: %f secs" % (time.time()-t)
69 
70     print "Classification report for SVM:\n%s\n" % (metrics.classification_report(label_test, predicted))
71     print "Confusion matrix:\n%s" % metrics.confusion_matrix(label_test, predicted)
72 
73   _svm()
74 
75 run(0.1)
76 run(0.2)
77 run(0.4)
78 run(0.6)
79 run(0.8)
====================================================================================================

Split: 0.10
Size: 1797, Classifying Size: 179, Testing Size: 1618
-------------------- OPF --------------------
OPF: time elapsed in fitting: 0.003396 secs
OPF: time elapsed in predicting: 0.016637 secs
Classification report for OPF:
             precision    recall  f1-score   support

          0       0.99      0.99      0.99       156
          1       0.90      0.93      0.91       158
          2       0.97      0.98      0.98       159
          3       0.90      0.92      0.91       165
          4       0.99      0.97      0.98       163
          5       0.97      0.97      0.97       164
          6       0.99      0.99      0.99       170
          7       0.95      0.98      0.97       157
          8       0.92      0.78      0.84       161
          9       0.85      0.92      0.88       165

avg / total       0.94      0.94      0.94      1618


Confusion matrix:
[[155   0   0   0   1   0   0   0   0   0]
 [  0 147   0   0   0   0   0   1   4   6]
 [  0   3 156   0   0   0   0   0   0   0]
 [  1   0   0 151   0   1   0   4   4   4]
 [  0   2   0   0 158   0   0   1   0   2]
 [  0   0   0   0   1 159   1   0   0   3]
 [  0   0   0   0   0   0 169   0   1   0]
 [  0   0   0   0   0   0   0 154   2   1]
 [  0  10   4   8   0   1   1   2 125  10]
 [  1   2   0   8   0   3   0   0   0 151]]
-------------------- SVM --------------------
SVM: time elapsed in fitting: 0.013867 secs
SVM: time elapsed in predicting: 0.047028 secs
Classification report for SVM:
             precision    recall  f1-score   support

          0       1.00      0.76      0.87       156
          1       0.17      1.00      0.29       158
          2       1.00      0.68      0.81       159
          3       1.00      0.50      0.67       165
          4       1.00      0.57      0.73       163
          5       1.00      0.64      0.78       164
          6       1.00      0.28      0.44       170
          7       1.00      0.72      0.84       157
          8       1.00      0.02      0.05       161
          9       1.00      0.18      0.30       165

avg / total       0.92      0.53      0.58      1618


Confusion matrix:
[[119  37   0   0   0   0   0   0   0   0]
 [  0 158   0   0   0   0   0   0   0   0]
 [  0  51 108   0   0   0   0   0   0   0]
 [  0  82   0  83   0   0   0   0   0   0]
 [  0  70   0   0  93   0   0   0   0   0]
 [  0  59   0   0   0 105   0   0   0   0]
 [  0 122   0   0   0   0  48   0   0   0]
 [  0  44   0   0   0   0   0 113   0   0]
 [  0 157   0   0   0   0   0   0   4   0]
 [  0 136   0   0   0   0   0   0   0  29]]

====================================================================================================

Split: 0.20
Size: 1797, Classifying Size: 359, Testing Size: 1438
-------------------- OPF --------------------
OPF: time elapsed in fitting: 0.013214 secs
OPF: time elapsed in predicting: 0.027107 secs
Classification report for OPF:
             precision    recall  f1-score   support

          0       0.99      0.99      0.99       142
          1       0.90      0.99      0.94       143
          2       0.99      0.99      0.99       142
          3       0.95      0.96      0.96       137
          4       0.97      1.00      0.99       143
          5       0.98      0.97      0.98       147
          6       0.99      0.98      0.98       155
          7       0.97      0.99      0.98       142
          8       0.95      0.88      0.91       144
          9       0.97      0.91      0.94       143

avg / total       0.97      0.97      0.97      1438


Confusion matrix:
[[141   0   0   0   1   0   0   0   0   0]
 [  0 141   0   0   2   0   0   0   0   0]
 [  0   1 140   1   0   0   0   0   0   0]
 [  0   0   0 132   0   0   0   1   3   1]
 [  0   0   0   0 143   0   0   0   0   0]
 [  0   0   0   1   0 143   1   0   0   2]
 [  1   2   0   0   0   0 152   0   0   0]
 [  0   0   0   0   0   0   0 141   0   1]
 [  0  12   1   3   0   0   1   1 126   0]
 [  0   1   0   2   1   3   0   3   3 130]]
-------------------- SVM --------------------
SVM: time elapsed in fitting: 0.048080 secs
SVM: time elapsed in predicting: 0.072515 secs
Classification report for SVM:
             precision    recall  f1-score   support

          0       1.00      0.97      0.99       142
          1       0.92      0.98      0.95       143
          2       0.99      0.99      0.99       142
          3       0.98      0.98      0.98       137
          4       0.93      0.99      0.96       143
          5       0.95      0.98      0.97       147
          6       1.00      0.93      0.96       155
          7       0.97      0.97      0.97       142
          8       0.97      0.90      0.93       144
          9       0.92      0.94      0.93       143

avg / total       0.96      0.96      0.96      1438


Confusion matrix:
[[138   0   0   0   1   0   0   0   0   3]
 [  0 140   0   0   2   1   0   0   0   0]
 [  0   0 141   0   0   0   0   1   0   0]
 [  0   0   0 134   0   0   0   1   1   1]
 [  0   0   0   0 142   0   0   1   0   0]
 [  0   0   0   1   0 144   0   0   0   2]
 [  0   2   0   0   5   3 144   0   1   0]
 [  0   0   0   0   1   0   0 138   0   3]
 [  0   9   1   1   1   0   0   1 129   2]
 [  0   1   0   1   1   3   0   0   2 135]]

====================================================================================================

Split: 0.40
Size: 1797, Classifying Size: 718, Testing Size: 1079
-------------------- OPF --------------------
OPF: time elapsed in fitting: 0.051504 secs
OPF: time elapsed in predicting: 0.041591 secs
Classification report for OPF:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00       109
          1       0.97      0.99      0.98       106
          2       0.99      0.99      0.99       102
          3       0.97      0.99      0.98        99
          4       1.00      0.99      1.00       108
          5       0.98      0.96      0.97       112
          6       0.98      0.98      0.98       114
          7       0.97      1.00      0.99       106
          8       0.98      0.95      0.96       116
          9       0.96      0.96      0.96       107

avg / total       0.98      0.98      0.98      1079


Confusion matrix:
[[109   0   0   0   0   0   0   0   0   0]
 [  0 105   0   0   0   0   1   0   0   0]
 [  0   0 101   0   0   0   0   1   0   0]
 [  0   0   0  98   0   0   0   0   0   1]
 [  0   0   0   0 107   0   0   1   0   0]
 [  0   0   0   1   0 108   1   0   0   2]
 [  0   1   0   0   0   0 112   0   1   0]
 [  0   0   0   0   0   0   0 106   0   0]
 [  0   2   1   2   0   0   0   0 110   1]
 [  0   0   0   0   0   2   0   1   1 103]]
-------------------- SVM --------------------
SVM: time elapsed in fitting: 0.101640 secs
SVM: time elapsed in predicting: 0.078259 secs
Classification report for SVM:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00       109
          1       0.99      1.00      1.00       106
          2       1.00      0.99      1.00       102
          3       1.00      1.00      1.00        99
          4       0.99      1.00      1.00       108
          5       1.00      0.96      0.98       112
          6       0.99      0.99      0.99       114
          7       0.99      0.99      0.99       106
          8       0.99      0.98      0.99       116
          9       0.96      1.00      0.98       107

avg / total       0.99      0.99      0.99      1079


Confusion matrix:
[[109   0   0   0   0   0   0   0   0   0]
 [  0 106   0   0   0   0   0   0   0   0]
 [  0   0 101   0   0   0   0   1   0   0]
 [  0   0   0  99   0   0   0   0   0   0]
 [  0   0   0   0 108   0   0   0   0   0]
 [  0   0   0   0   1 108   1   0   0   2]
 [  0   0   0   0   0   0 113   0   1   0]
 [  0   0   0   0   0   0   0 105   0   1]
 [  0   1   0   0   0   0   0   0 114   1]
 [  0   0   0   0   0   0   0   0   0 107]]

====================================================================================================

Split: 0.60
Size: 1797, Classifying Size: 1078, Testing Size: 719
-------------------- OPF --------------------
OPF: time elapsed in fitting: 0.115626 secs
OPF: time elapsed in predicting: 0.038860 secs
Classification report for OPF:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        75
          1       0.92      1.00      0.96        69
          2       1.00      1.00      1.00        72
          3       0.99      0.96      0.97        70
          4       1.00      0.99      0.99        67
          5       1.00      1.00      1.00        69
          6       1.00      0.99      0.99        75
          7       0.98      0.99      0.98        81
          8       0.97      0.93      0.95        68
          9       0.97      0.97      0.97        73

avg / total       0.98      0.98      0.98       719


Confusion matrix:
[[75  0  0  0  0  0  0  0  0  0]
 [ 0 69  0  0  0  0  0  0  0  0]
 [ 0  0 72  0  0  0  0  0  0  0]
 [ 0  0  0 67  0  0  0  2  1  0]
 [ 0  1  0  0 66  0  0  0  0  0]
 [ 0  0  0  0  0 69  0  0  0  0]
 [ 0  1  0  0  0  0 74  0  0  0]
 [ 0  0  0  0  0  0  0 80  0  1]
 [ 0  4  0  0  0  0  0  0 63  1]
 [ 0  0  0  1  0  0  0  0  1 71]]
-------------------- SVM --------------------
SVM: time elapsed in fitting: 0.122449 secs
SVM: time elapsed in predicting: 0.064791 secs
Classification report for SVM:
             precision    recall  f1-score   support

          0       1.00      0.99      0.99        75
          1       0.97      1.00      0.99        69
          2       1.00      1.00      1.00        72
          3       1.00      0.99      0.99        70
          4       0.99      1.00      0.99        67
          5       1.00      1.00      1.00        69
          6       1.00      1.00      1.00        75
          7       0.99      0.99      0.99        81
          8       1.00      0.94      0.97        68
          9       0.96      1.00      0.98        73

avg / total       0.99      0.99      0.99       719


Confusion matrix:
[[74  0  0  0  1  0  0  0  0  0]
 [ 0 69  0  0  0  0  0  0  0  0]
 [ 0  0 72  0  0  0  0  0  0  0]
 [ 0  0  0 69  0  0  0  1  0  0]
 [ 0  0  0  0 67  0  0  0  0  0]
 [ 0  0  0  0  0 69  0  0  0  0]
 [ 0  0  0  0  0  0 75  0  0  0]
 [ 0  0  0  0  0  0  0 80  0  1]
 [ 0  2  0  0  0  0  0  0 64  2]
 [ 0  0  0  0  0  0  0  0  0 73]]

====================================================================================================

Split: 0.80
Size: 1797, Classifying Size: 1437, Testing Size: 360
-------------------- OPF --------------------
OPF: time elapsed in fitting: 0.204330 secs
OPF: time elapsed in predicting: 0.024736 secs
Classification report for OPF:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        39
          1       1.00      0.95      0.98        22
          2       1.00      1.00      1.00        31
          3       1.00      1.00      1.00        44
          4       1.00      1.00      1.00        42
          5       0.95      0.97      0.96        36
          6       0.97      1.00      0.99        34
          7       1.00      0.98      0.99        43
          8       1.00      1.00      1.00        30
          9       0.97      0.97      0.97        39

avg / total       0.99      0.99      0.99       360


Confusion matrix:
[[39  0  0  0  0  0  0  0  0  0]
 [ 0 21  0  0  0  1  0  0  0  0]
 [ 0  0 31  0  0  0  0  0  0  0]
 [ 0  0  0 44  0  0  0  0  0  0]
 [ 0  0  0  0 42  0  0  0  0  0]
 [ 0  0  0  0  0 35  1  0  0  0]
 [ 0  0  0  0  0  0 34  0  0  0]
 [ 0  0  0  0  0  0  0 42  0  1]
 [ 0  0  0  0  0  0  0  0 30  0]
 [ 0  0  0  0  0  1  0  0  0 38]]
-------------------- SVM --------------------
SVM: time elapsed in fitting: 0.153056 secs
SVM: time elapsed in predicting: 0.036347 secs
Classification report for SVM:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        39
          1       1.00      1.00      1.00        22
          2       1.00      1.00      1.00        31
          3       1.00      1.00      1.00        44
          4       1.00      0.98      0.99        42
          5       1.00      0.97      0.99        36
          6       0.97      1.00      0.99        34
          7       1.00      0.98      0.99        43
          8       1.00      1.00      1.00        30
          9       0.95      1.00      0.97        39

avg / total       0.99      0.99      0.99       360


Confusion matrix:
[[39  0  0  0  0  0  0  0  0  0]
 [ 0 22  0  0  0  0  0  0  0  0]
 [ 0  0 31  0  0  0  0  0  0  0]
 [ 0  0  0 44  0  0  0  0  0  0]
 [ 0  0  0  0 41  0  0  0  0  1]
 [ 0  0  0  0  0 35  1  0  0  0]
 [ 0  0  0  0  0  0 34  0  0  0]
 [ 0  0  0  0  0  0  0 42  0  1]
 [ 0  0  0  0  0  0  0  0 30  0]
 [ 0  0  0  0  0  0  0  0  0 39]]