Generate an Adjacency matrix based on input data

This lesson shows two examples of input data types that can be used to classification purpose. Those codes are responsible for reading the data file, calculate the distance between samples and then generate the Adjacency matrix that represents the input data. This Adjacency matrix is a graph representation of the data and contains the weight of each connection (arc) between two samples.

1. How to generate an adjacency matrix from a scikits.learn database

 1 from numpy import *
 2 from iaOPF import *
 3 
 4 from scikits.learn import datasets
 5 from scipy.spatial import distance
 6 
 7 def load_iris():
 8     iris = datasets.load_iris()
 9     labels = iris.target
10     feats = iris['data']
11     return feats, labels
12 
13 def euclidian(X, Y):
14     E = X - Y
15     return sqrt(dot(E, E))
16 
17 feats, labels = load_iris()
18 data = array(feats)
19 
20 nsamples =5;
21 data = random.permutation(data)
22 data = data[0:nsamples,:] # took 5 samples only for visualization puspose
23 
24 
25 A = distance.squareform(distance.pdist(data,euclidian))
26 
27 mmgraphviz(iaadjmxtcreate(A,dist=True), title='Complete graph from IRIS dataset')
28 print "Adjacency Matrix = "
29 print A # visualize the adjacency matrix
Adjacency Matrix = 
[[ 0.          3.87556448  3.29089653  0.88317609  3.02324329]
 [ 3.87556448  0.          1.62788206  4.12067956  1.42828569]
 [ 3.29089653  1.62788206  0.          3.37786915  0.47958315]
 [ 0.88317609  4.12067956  3.37786915  0.          3.10805405]
 [ 3.02324329  1.42828569  0.47958315  3.10805405  0.        ]]
/media/_xsb/iaOPF/lesson_readdata/GRVIZ89748_001.png

Complete graph from IRIS dataset

1. How to generate an adjacency matrix from an attachment file

 1 from numpy import *
 2 from scikits.learn import *
 3 
 4 f = open(find_attachment_file('EM_peri.txt'))
 5 v = f.read()
 6 data = []
 7 for word in v.split():
 8     data.append(float(word))
 9 
10 data = array(data)
11 data = data.reshape((59,61))
12 nsamples =5
13 data = data[0:nsamples,:] # took 5 samples only for visualization puspose
14 A = distance.squareform(distance.pdist(data,euclidian))
15 
16 mmgraphviz(iaadjmxtcreate(A,dist=True), title='Complete graph from attachment file dataset')
17 print "Adjacency Matrix = "
18 print A
Adjacency Matrix = 
[[    0.          2088.36734259  1143.44127434  1266.62056689
    749.39116527]
 [ 2088.36734259     0.           967.46859947   886.1689149   1416.90500412]
 [ 1143.44127434   967.46859947     0.           177.54562257   464.8758731 ]
 [ 1266.62056689   886.1689149    177.54562257     0.           561.48193361]
 [  749.39116527  1416.90500412   464.8758731    561.48193361     0.        ]]
/media/_xsb/iaOPF/lesson_readdata/GRVIZ89748_002.png

Complete graph from attachment file dataset