Tải bản đầy đủ (.pdf) (91 trang)

Investigation of bayesian networks for classification problems involving binary data

Bạn đang xem bản rút gọn của tài liệu. Xem và tải ngay bản đầy đủ của tài liệu tại đây (406.62 KB, 91 trang )

D:\CD\Noiseless Data\nb_data_run.m
Printed at 20:58 on 14 Mar 2005
1
2
3
4
5
6
7
8

Page 1 of 11

function nb_data_run()
% Used to do inference testing for NB Data Strcuture
% This is the corrected code !!!!!!

fid=fopen('NB Data Structure Full Summary.txt','w');
fprintf(fid,'This is the Full details for NB Data Structure with varying Data Size.');
fprintf(fid,'\n----------------------------------------------------------------------\n')
;
9 fclose(fid);
10
11 fid1=fopen('NB Data Structure Dataset.txt','w');
12 fclose(fid1);
13
14 fid2=fopen('NB Data Structure Summarised Results.csv','w');
15 fprintf(fid2,'Nodes,RunNum,TrainNum,ActualCorrect,ActualNegllhood,ActualUnknown,NBCorrect
,NBNegllhood,NBUnknown,NBTime,TANBCorrect,TANBNegllhood,TANBUnknown,TANBLearntime,TANBInf
erTime,TANBtime,BNCorrect,BNNegllhood,BNUnknown,BNLearnTime,BNinferTime,BNtime');
16 fclose(fid2);


17
18 train_counter_table = [100;500;1000;5000;10000];
19
20 for num_of_nodes = 80:20:100
21
for run = 1:1:20
22
23
fid=fopen('NB Data Structure Full Summary.txt','a');
24
fid1=fopen('NB Data Structure Dataset.txt','a');
25
fid2=fopen('NB Data Structure Summarised Results.csv','a');
26
27
% *****************************************
28
% Structure
29
% *****************************************
30
31
N = num_of_nodes + 1;
32
dag = zeros(N,N);
33
dag(N,1:(N-1))=1; % Root Node to all nodes
34
35
% *****************************************

36
% Creating Network
37
% *****************************************
38
observed_node_count = 0;
39
observed_nodes = zeros(1,N-1);
40
for observed_col = 1:1:(N-1)
41
observed_node_count = observed_node_count+1;
42
observed_nodes(1,observed_col) = observed_node_count;
43
end
44
45
false = 1; true = 2;
46
ns = 2*ones(1,N); % binary nodes
47
48
bnet = mk_bnet(dag,ns,'observed',observed_nodes);
49
50
% *****************************************
51
% Inputing Parameters
52

% *****************************************
53
54
rand('state',sum(100*clock));
55
for nodule = 1:1:(N-1)
56
bnet.CPD{nodule} = tabular_CPD(bnet, nodule);
57
end
58
59
bnet.CPD{N} = tabular_CPD(bnet, N, [0.5 0.5]);
60
61
% *****************************************************
62
% To Display the Actual Parameters


D:\CD\Noiseless Data\nb_data_run.m
Printed at 20:58 on 14 Mar 2005
63
64
65
66
67
68
69
70

71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

Page 2 of 11

% *****************************************************
fprintf(fid,'\nDisplaying Actual Parameters for Actual Network.\n');
CPT3 = cell(1,N);
for i=1:N
s=struct(bnet.CPD{i}); % violate object privacy
CPT3{i}=s.CPT;
fprintf(fid,'\nDisplaying Actual Parameters for node %2.0d',i);
fprintf(fid,'\n');
display_CPT(fid,CPT3{i})
end
fprintf(fid,'\n');

for train_counter_row = 1:1:5
train_counter = train_counter_table(train_counter_row,1);

fprintf(fid,'\nNumber of Nodes %3.0f\n',num_of_nodes);
fprintf(fid,'\nRun Number %3.0f\n',run);
fprintf(fid,'\n%3.0f training examples\n',train_counter);
fprintf(fid1,'\nNumber of Nodes %3.0f\n',num_of_nodes);
fprintf(fid1,'\nRun Number %3.0f\n',run);
fprintf(fid1,'\n%3.0f training examples\n',train_counter);
fprintf(fid2,'\n%3.0f,%3.0f,%3.0f,',num_of_nodes,run,train_counter);
% *****************************************************
% To Generate Training Cases
% *****************************************************
seed = 0;
rand('state', seed);
randn('state', seed);
ncases_train = train_counter;
ncases_test = 10000;
initial_gen_data = zeros(N, ncases_train);
for m=1:ncases_train
initial_gen_data(:,m) = cell2num(sample_bnet(bnet));
end
train_data = initial_gen_data(1:(N-1),:)';
train_names = initial_gen_data(N,:)';
save('art_train_data.txt','train_data','-ASCII','-tabs');
save('art_train_names.txt','train_names','-ASCII','-tabs');
nodes_string = int2str(num_of_nodes);
run_string = int2str(run);
train_string = int2str(train_counter);


train_filename = strcat(nodes_string,'_nodes_',run_string,'_run_',train_strin
g,'_train_num_TRAIN.txt');
117
temp = initial_gen_data';
118
save(train_filename,'temp','-ASCII','-tabs');
119
clear temp;
120
121
% *****************************************************
122
% To Generate Test Cases
123
% *****************************************************
124
125
126
initial_gen_tdata = zeros(N, ncases_test);


D:\CD\Noiseless Data\nb_data_run.m
Printed at 20:58 on 14 Mar 2005
127
128
129
130
131

Page 3 of 11


for m=1:ncases_test
initial_gen_tdata(:,m) = cell2num(sample_bnet(bnet));
end
fprintf(fid1,'\nTest Data Run %3.0f with %3.0f Training Examples\n',run,train
_counter);

132
133
134
135
136
137
138
139

test_data = initial_gen_tdata(1:(N-1),:)';
test_names = initial_gen_tdata(N,:)';
save('art_test_data.txt','test_data','-ASCII','-tabs');
save('art_test_names.txt','test_names','-ASCII','-tabs');

test_filename = strcat(nodes_string,'_nodes_',run_string,'_run_',train_string
,'_train_num_TEST.txt');
140
temp = initial_gen_tdata';
141
save(test_filename,'temp','-ASCII','-tabs');
142
clear temp;
143

144
% ***************************************************************************
**************************************************
145
% Calculating Accuracy of Actual Prediction or Bayes Error
146
% ***************************************************************************
**************************************************
147
148
xt=load('art_test_data.txt');
149
yt=load('art_test_names.txt');
150
151
engine = jtree_ndx_inf_engine(bnet);
152
evidence = cell(1,N); % for inference, the instances should be fed in one at
a time
153
class = zeros(size(xt,1),3);
154
llhood = zeros(size(xt,1),1);
155
correct = 0;
156
unknown = 0;
157
158
% Inference on Test

159
160
for row= 1:1:size(xt,1)
161
for col= 1:1:(N-1)
162
evidence{1,col}=xt(row,col);
163
end
164
engine = enter_evidence(engine,evidence');
165
m = marginal_nodes(engine,N);
166
167
% to output the marginals calculated into classes
168
class(row,1) = m.T(1);
169
class(row,2) = m.T(2);
170
if class(row,1)>class(row,2)
171
class(row,3) = 1;
172
llhood(row,1) = class(row,1);
173
elseif class(row,3)174
class(row,3) = 2;

175
llhood(row,1) = class(row,2);
176
else
177
class(row,3) = -1;
178
llhood(row,1) = 1;
179
end
180
181
end
182
183
% *****************************************************
184
% To compute the accuracy of classification
185
% *****************************************************
186


D:\CD\Noiseless Data\nb_data_run.m
Printed at 20:58 on 14 Mar 2005
187
188
189
190
191

192
193
194
195
196
197
198
199
200
201
202

Page 4 of 11

check = zeros(size(xt,1),1);
for row = 1:1:size(xt,1)
if class(row,3) == yt(row,1)
check(row,1)=1;
else
llhood(row,1)=class(row,yt(row,1));
end
end
correct = size(find(check),1);
temp = class(:,3);
unknown = size(find(temp<-0.5),1);
BN_llhood = sum(-1*log(llhood));
fprintf(fid,'\n%3.0f correct for Actual Structure.\n',correct);
fprintf(fid,'\n%3.3f is the Actual Negative Log Likelihood Score.\n',BN_llhoo
d);


203
204
205
206
207
208
209

fprintf(fid,'\n%3.0f unknown for Actual Structure.\n',unknown);
fprintf(fid2,'%3.0f,%3.3f,%3.0f,',correct,BN_llhood,unknown);
clear xt yt engine evidence correct unknown BN_llhood temp;

% ***************************************************************************
**************************************************
210
% Calculating Accuracy of Naive Bayes Prediction
211
% ***************************************************************************
**************************************************
212
213
x=load('art_train_data.txt');
214
y=load('art_train_names.txt');
215
xt=load('art_test_data.txt');
216
yt=load('art_test_names.txt');
217
218

num_var = size(x,2);
219
length = size(x,1);
220
221
222
bnet_test = mk_bnet(dag,ns,'observed',observed_nodes);
223
seed = 0;
224
rand('state',seed);
225
226
for node_test = 1:1:(N)
227
bnet_test.CPD{node_test} = tabular_CPD(bnet_test, node_test,'prior_type',
'dirichlet','dirichlet_weight',1,'dirichlet_type','unif');
228
end
229
230
tic;
231
232
% Learning Params
233
x(:,size(x,2)+1)=y(:,1);
234
bnet_test = learn_params(bnet_test,x');
235

236
% *****************************************************
237
% To Display the Learned Parameters
238
% *****************************************************
239
240
fprintf(fid,'\nDisplaying Learned Parameters using Actual Network Structure.\
n');
241
CPT3 = cell(1,N);
242
for i=1:N
243
s=struct(bnet_test.CPD{i}); % violate object privacy
244
CPT3{i}=s.CPT;
245
fprintf(fid,'\nDisplaying Learned Parameters for node %2.0d',i);
246
fprintf(fid,'\n');


D:\CD\Noiseless Data\nb_data_run.m
Printed at 20:58 on 14 Mar 2005
247
248
249
250

251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280

281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306

Page 5 of 11

display_CPT(fid,CPT3{i})

end
fprintf(fid,'\n');

engine = jtree_ndx_inf_engine(bnet_test);
evidence = cell(1,num_var+1); % for inference, the instances should be fed in
one at a time
class = zeros(size(xt,1),3);
llhood = zeros(size(xt,1),1);
correct = 0;
unknown = 0;
% Inference on Test
for row= 1:1:size(xt,1)
for col= 1:1:(num_var)
evidence{1,col}=xt(row,col);
end
engine = enter_evidence(engine,evidence');
m = marginal_nodes(engine,N);
% to output the marginals calculated into classes
class(row,1) = m.T(1);
class(row,2) = m.T(2);
if class(row,1)>class(row,2)
class(row,3) = 1;
llhood(row,1) = class(row,1);
elseif class(row,3)class(row,3) = 2;
llhood(row,1) = class(row,2);
else
class(row,3) = -1;
llhood(row,1) = 1;
end

end
% *****************************************************
% To compute the accuracy of classification
% *****************************************************
check = zeros(size(xt,1),1);
for row = 1:1:size(xt,1)
if class(row,3) == yt(row,1)
check(row,1)=1;
else
llhood(row,1)=class(row,yt(row,1));
end
end
correct = size(find(check),1);
temp = class(:,3);
unknown = size(find(temp<-0.5),1);
BN_llhood = sum(-1*log(llhood));
Actual_Time = toc;
fprintf(fid,'\n%3.0f correct for Actual Structure.\n',correct);
fprintf(fid,'\n%3.3f is the Actual Negative Log Likelihood Score.\n',BN_llhoo
d);

307
308

fprintf(fid,'\n%3.0f unknown for Actual Structure.\n',unknown);
fprintf(fid,'\n%3.0f time for inference and learning for Actual Structure.\n'
,Actual_Time);


D:\CD\Noiseless Data\nb_data_run.m

Printed at 20:58 on 14 Mar 2005
309
310
311

Page 6 of 11

% fprintf(fid1,'\n%3.0f correct for Actual Structure.\n',correct);
% fprintf(fid1,'\n%3.3f is the Actual Negative Log Likelihood Score.\n',BN_ll
hood);

312
313

% fprintf(fid1,'\n%3.0f unknown for Actual Structure.\n',unknown);
% fprintf(fid1,'\n%3.0f time for inference and learning for Actual Structure.
\n',Actual_Time);

314
315

fprintf(fid2,'%3.0f,%3.3f,%3.0f,%3.2f,',correct,BN_llhood,unknown,Actual_Time
);

316
317

clear x y xt yt engine evidence bnet_test correct unknown BN_llhood Actual_Ti
me temp;


318
319

% ***************************************************************************
*******************************************************
320
% ***************************************************************************
*******************************************************
321
% ***************************************************************************
*******************************************************
322
323
324
325
x=load('art_train_data.txt');
326
y=load('art_train_names.txt');
327
xt=load('art_test_data.txt');
328
yt=load('art_test_names.txt');
329
330
num_var = size(x,2);
331
length = size(x,1);
332
data=zeros(train_counter,2);
333

tic;
334
% Build a Maximumlly Weighted Tree
335
for var1 = 1:1:num_var
336
for var2 = 1:1:num_var
337
if var1~=var2
338
339
data(:,1) = x(:,var1);
340
data(:,2) = x(:,var2);
341
cmi_score(var1,var2)=cmi(data,y,2,2);
342
else
343
cmi_score(var1,var2)=0;
344
end
345
end
346
end
347
348
A = minimum_spanning_tree(-1*cmi_score);
349

% draw_graph(A)
350
351
352
% *****************************************************
353
% To Create BNET Structure for BNT
354
% *****************************************************
355
356
node_sizes = 2*ones(1,num_var+1);
357
T = mk_rooted_sptree(A,3);
358
359
T(N,1:(N-1))=1;
360
361
bnet1 = mk_bnet(T,node_sizes,'observed',observed_nodes);
362
seed=0;
363
rand('state',seed);
364
365
for node_bnet = 1:1:(N)
366
bnet1.CPD{node_bnet} = tabular_CPD(bnet1, node_bnet,'prior_type','dirichl



D:\CD\Noiseless Data\nb_data_run.m
Printed at 20:58 on 14 Mar 2005
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393

394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423

424
425
426
427
428
429

Page 7 of 11

et','dirichlet_weight',1,'dirichlet_type','unif');
end

% draw_graph(bnet1.dag)
%
%
%
%
%

*****************************************************
To Learn Parameters
*****************************************************
the data has to be fed in with examples in columns
the data to be fed in must be (1,2) and not (1,0)

x(:,size(x,2)+1)=y(:,1);
bnet1 = learn_params(bnet1,x');
TANB_learn_time = toc;
fprintf(fid,'\nTANB Structure\n');
drawT = full(T);

for i=1:1:N
for j=1:1:N
fprintf(fid,'%3.0f,',drawT(i,j));
end
fprintf(fid,'\n');
end
fprintf(fid,'\n');
% *****************************************************
% To Display the Learned Parameters
% *****************************************************
fprintf(fid,'\nDisplaying Learned Parameters for TANB.\n');
CPT3 = cell(1,N);
for i=1:N
s=struct(bnet1.CPD{i}); % violate object privacy
CPT3{i}=s.CPT;
fprintf(fid,'\nDisplaying Learned Parameters for node %3.d',i);
fprintf(fid,'\n');
display_CPT(fid,CPT3{i})
end
fprintf(fid,'\n');
% *****************************************************
% To Infer on the test case
% *****************************************************
tic;
engine = jtree_ndx_inf_engine(bnet1);
evidence = cell(1,num_var+1); % for inference, the instances should be fed in
one at a time
class = zeros(size(xt,1),3);
llhood = zeros(size(xt,1),1);
correct = 0;

unknown = 0;
for row= 1:1:size(xt,1)
for col= 1:1:(num_var)
evidence{1,col}=xt(row,col);
end
engine = enter_evidence(engine,evidence');
m = marginal_nodes(engine,N);
% to output the marginals calculated into classes
class(row,1) = m.T(1);


D:\CD\Noiseless Data\nb_data_run.m
Printed at 20:58 on 14 Mar 2005
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446

447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467

Page 8 of 11

class(row,2) = m.T(2);
if class(row,1)>class(row,2)
class(row,3) = 1;
llhood(row,1) = class(row,1);
elseif class(row,3)class(row,3) = 2;

llhood(row,1) = class(row,2);
else
class(row,3) = -1;
llhood(row,1) = 1;
end
end
% *****************************************************
% To compute the accuracy of classification
% *****************************************************
check = zeros(size(xt,1),1);
for row = 1:1:size(xt,1)
if class(row,3) == yt(row,1)
check(row,1)=1;
else
llhood(row,1)=class(row,yt(row,1));
end
end
% save('art_pred.txt','class','-ASCII','-tabs');
correct = size(find(check),1);
temp = class(:,3);
unknown = size(find(temp<-0.5),1);
BN_llhood = sum(-1*log(llhood));
TANB_infer_time = toc;
TANB_time=TANB_learn_time+TANB_infer_time;
fprintf(fid,'\n%3.0f correct for TANB Structure.\n',correct);
fprintf(fid,'\n%3.3f is the TANB Negative Log Likelihood Score.\n',BN_llhood)
;

468
469


fprintf(fid,'\n%3.0f unknown for TANB Structure.\n',unknown);
fprintf(fid,'\n%3.2f time for learning for TANB Structure.\n',TANB_learn_time
);

470

fprintf(fid,'\n%3.2f time for inference for TANB Structure.\n',TANB_infer_tim
e);

471

fprintf(fid,'\n%3.2f time for inference and learning for TANB Structure.\n',T
ANB_time);

472
473
474
475
476
477

fprintf(fid2,'%3.0f,%3.3f,%3.0f,%3.2f,%3.2f,%3.2f,',correct,BN_llhood,unknown
,TANB_learn_time,TANB_infer_time,TANB_time);
clear x y xt yt engine evidence bnet1 correct unknown BN_llhood TANB_learn_ti
me TANB_infer_time TANB_time;

% ***************************************************************************
*******************************************************
478

% ***************************************************************************
*******************************************************
479
% ***************************************************************************
*******************************************************
480
481
x=load('art_train_data.txt');
482
y=load('art_train_names.txt');
483
xt=load('art_test_data.txt');
484
yt=load('art_test_names.txt');
485


D:\CD\Noiseless Data\nb_data_run.m
Printed at 20:58 on 14 Mar 2005
486
487
488
489
490
491
492
493
494
495
496

497
498
499
500
501
502
503
504
505
506
507
508
509
510
511

Page 9 of 11

tic;
% *****************************************************
% To Create FULL BNET Structure for BNT
% *****************************************************
x(:,size(x,2)+1)=y(:,1);
node_sizes = 2*ones(1,N);
node_count = 0;
order = zeros(1,N);
for order_row = 2:1:N
node_count = node_count+1;
order(1,order_row) = node_count;
end

order(1,1) = N;
max_parents = 5;

fullT = learn_struct_K2(x',node_sizes,order,'max_fan_in',max_parents);
bnet2 = mk_bnet(fullT,node_sizes,'observed',observed_nodes);
seed=0;
rand('state',seed);

for node_bnet1 = 1:1:N
bnet2.CPD{node_bnet1} = tabular_CPD(bnet2, node_bnet1,'prior_type','diric
hlet','dirichlet_weight',1,'dirichlet_type','unif');
512
end
513
514
% draw_graph(bnet1.dag)
515
516
% *****************************************************
517
% To Learn Parameters
518
% *****************************************************
519
% the data has to be fed in with examples in columns
520
% the data to be fed in must be (1,2) and not (1,0)
521
522
523

bnet2 = learn_params(bnet2,x');
524
525
BN_learn_time = toc;
526
527
fprintf(fid,'\nK2 Learned BN Structure\n');
528
drawfullT = full(fullT);
529
for i=1:1:N
530
for j=1:1:N
531
fprintf(fid,'%3.0f,',drawfullT(i,j));
532
end
533
fprintf(fid,'\n');
534
end
535
fprintf(fid,'\n');
536
537
% *****************************************************
538
% To Display the Learned Parameters
539
% *****************************************************

540
541
fprintf(fid,'\nDisplaying Learned Parameters for Full BAN.\n');
542
CPT4 = cell(1,N);
543
for i=1:N
544
s=struct(bnet2.CPD{i}); % violate object privacy
545
CPT4{i}=s.CPT;
546
fprintf(fid,'\nDisplaying Learned Parameters for node %3.d',i);
547
fprintf(fid,'\n');
548
display_CPT(fid,CPT4{i})
549
end


D:\CD\Noiseless Data\nb_data_run.m
Printed at 20:58 on 14 Mar 2005
550
551
552
553
554
555
556

557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586

587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613

Page 10 of 11


fprintf(fid,'\n');

% *****************************************************
% To Infer on the test case
% *****************************************************
tic;
engine = jtree_ndx_inf_engine(bnet2);
evidence = cell(1,num_var+1); % for inference, the instances should be fed in
one at a time
class = zeros(size(xt,1),3);
llhood = zeros(size(xt,1),1);
correct = 0;
unknown = 0;
for row= 1:1:size(xt,1)
for col= 1:1:(num_var)
evidence{1,col}=xt(row,col);
end
engine = enter_evidence(engine,evidence');
m = marginal_nodes(engine,N);
% to output the marginals calculated into classes
class(row,1) = m.T(1);
class(row,2) = m.T(2);
if class(row,1)>class(row,2)
class(row,3) = 1;
llhood(row,1) = class(row,1);
elseif class(row,3)class(row,3) = 2;
llhood(row,1) = class(row,2);
else
class(row,3) = -1;

llhood(row,1) = 1;
end
end
% *****************************************************
% To compute the accuracy of classification
% *****************************************************
check = zeros(size(xt,1),1);
for row = 1:1:size(xt,1)
if class(row,3) == yt(row,1)
check(row,1)=1;
else
llhood(row,1)=class(row,yt(row,1));
end
end
% save('art_pred_full.txt','class','-ASCII','-tabs');
correct = size(find(check),1);
temp = class(:,3);
unknown = size(find(temp<-0.5),1);
BN_llhood = sum(-1*log(llhood));
BN_infer_time = toc;
BN_time = BN_learn_time+BN_infer_time;


D:\CD\Noiseless Data\nb_data_run.m
Printed at 20:58 on 14 Mar 2005
614
615
616
617
618

619
620

Page 11 of 11

fprintf(fid,'\n%3.0f correct for Full BN.\n',correct);
fprintf(fid,'\n%3.3f is the BN Negative Log Likelihood Score.\n',BN_llhood);
fprintf(fid,'\n%3.0f unknown for FULL BN Structure.\n',unknown);
fprintf(fid,'\n%3.2f needed to learn for K2 algo.\n',BN_learn_time);
fprintf(fid,'\n%3.2f needed to infer for K2 algo.\n',BN_infer_time);
fprintf(fid,'\n%3.2f needed to infer and learn for K2 algo.\n',BN_time);
fprintf(fid,'\n**************************************************************
*******************\n');

621
622
623

% fprintf(fid1,'\n%3.0f correct for Full BN.\n',correct);
% fprintf(fid1,'\n%3.3f is the BN Negative Log Likelihood Score.\n',BN_llhood
);

624
625
626
627
628
629
630
631

632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661

662
663
664
665
666
667
668
669
670

% fprintf(fid1,'\n%3.0f unknown for FULL BN Structure.\n',unknown);
% fprintf(fid1,'\n%3.2f needed to learn for K2 algo.\n',BN_learn_time);
% fprintf(fid1,'\n%3.2f needed to infer for K2 algo.\n',BN_infer_time);
% fprintf(fid1,'\n%3.2f needed to infer and learn for K2 algo.\n',BN_time);
% fprintf(fid1,'\n***********************************************************
**********************\n');
fprintf(fid2,'%3.0f,%3.3f,%3.0f,%3.2f,%3.2f,%3.2f,',correct,BN_llhood,unknown
,BN_learn_time,BN_infer_time,BN_time);
clear x y xt yt engine evidence bnet2 correct unknown BN_llhood BN_learn_time
BN_infer_time BN_time;
fprintf('\nEND OF SIMULATION for Node %3.0f with %3.0f Training Examples for
Run %3.0f\n', N, train_counter,run);
end % End of Loop for one specific definition of parameters
fclose(fid);
fclose(fid1);
fclose(fid2);
end %
end % End of Loop for a run of a number of nodes

% *****************************************************

% Additional Functions
% *****************************************************
function display_CPT(fid,CPT)
n = ndims(CPT);
parents_size = size(CPT);
parents_size = parents_size(1:end-1);
child_size = size(CPT,n);
c = 1;
for i=1:prod(parents_size)
parent_inst = ind2subv(parents_size, i);
fprintf(fid, '%d ', parent_inst);
fprintf(fid, ': ');
index = num2cell([parent_inst 1]);
index{n} = ':';
fprintf(fid, '%6.4f ', CPT(index{:}));
fprintf(fid, '\n');
end
function param = gen_nb_param();
param = rand(1,2);
param_temp=ones(1,2)-param;
param(1,3:4)=param_temp;


D:\CD\Noiseless Data\tanb_data_run.m
Printed at 20:58 on 14 Mar 2005
1
2
3
4
5

6
7
8
9

Page 1 of 10

function tanb_data_run()
% Used to do inference testing for TANB Data Strcuture
% This is used to test out when ordering is opposite to that stated for BN
% This is the corrected code !!!!!!

fid=fopen('TANB Data Structure Full Summary.txt','w');
fprintf(fid,'This is the Full details for TANB Data Structure with varying Data Size.');
fprintf(fid,'\n----------------------------------------------------------------------\n')
;
10 fclose(fid);
11
12 fid1=fopen('TANB Data Structure Dataset.txt','w');
13 fclose(fid1);
14
15 fid2=fopen('TANB Data Structure Summarised Results.txt','w');
16 fprintf(fid2,'Nodes\tRunNum\tTrainNum\tNBCorrect\tNBNegllhood\tNBUnknown\tNBtime\tTANBCor
rect\tTANBNegllhood\tTANBUnknown\tTANBLearntime\tTANBInferTime\tTANBtime\tBNCorrect\tBNNe
gllhood\tBNUnknown\tBNLearnTime\tBNinferTime\tBNtime');
17 fclose(fid2);
18
19 train_counter_table = [100;200;300;400;500;1000];
20
21 for num_of_nodes = 20:20:100

22 for run = 1:1:20
23
24 fid=fopen('TANB Data Structure Full Summary.txt','a');
25 fid1=fopen('TANB Data Structure Dataset.txt','a');
26 fid2=fopen('TANB Data Structure Summarised Results.txt','a');
27
28 % *****************************************
29 % TANB Dataset Generating Structure
30 % *****************************************
31
32 N = num_of_nodes + 1;
33 dag = zeros(N);
34 dag(2:N-1,1:N-2)=(eye(N-2)); % rot90 rotates the matrix by 90 degrees
35 dag(N,1:(N-1))=1; % Root Node to all nodes
36
37 % *****************************************
38 % Creating Network
39 % *****************************************
40
41 false = 1; true = 2;
42 ns = 2*ones(1,N); % binary nodes
43
44 bnet = mk_bnet(dag,ns);
45
46 % *****************************************
47 % Inputing Parameters
48 % *****************************************
49
50 rand('state',sum(100*clock));
51 for nodule = 1:1:(N-1)

52
bnet.CPD{nodule} = tabular_CPD(bnet, nodule);
53 end
54
55 bnet.CPD{N} = tabular_CPD(bnet, N, [0.5 0.5]);
56
57 % *****************************************************
58 % To Display the Actual Parameters
59 % *****************************************************
60
61 fprintf(fid,'\nDisplaying Actual Parameters for Actual Network.\n');
62 CPT3 = cell(1,N);


D:\CD\Noiseless Data\tanb_data_run.m
Printed at 20:58 on 14 Mar 2005
63
64
65
66
67
68
69
70
71
72
73
74
75
76

77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126

Page 2 of 10

for i=1:N
s=struct(bnet.CPD{i}); % violate object privacy
CPT3{i}=s.CPT;
fprintf(fid,'\nDisplaying Actual Parameters for node %2.0d',i);
fprintf(fid,'\n');
display_CPT(fid,CPT3{i})
end

fprintf(fid,'\n');
for train_counter_row = 1:1:6
train_counter = train_counter_table(train_counter_row,1);

fprintf(fid,'\nNumber of Nodes %3.0f\n',num_of_nodes);
fprintf(fid,'\nRun Number %3.0f\n',run);
fprintf(fid,'\n%3.0f training examples\n',train_counter);
fprintf(fid1,'\nNumber of Nodes %3.0f\n',num_of_nodes);
fprintf(fid1,'\nRun Number %3.0f\n',run);
fprintf(fid1,'\n%3.0f training examples\n',train_counter);
fprintf(fid2,'\n%3.0f\t%3.0f\t%3.0f\t',num_of_nodes,run,train_counter);
% *****************************************************
% To Generate Training Cases
% *****************************************************
seed = 0;
rand('state', seed);
randn('state', seed);
ncases_train = train_counter;
ncases_test = 10000;
initial_gen_data = zeros(N, ncases_train);
for m=1:ncases_train
initial_gen_data(:,m) = cell2num(sample_bnet(bnet));
end
fprintf(fid1,'\nTraining Data for Run %3.0f with %3.0f Training Examples\n',run,train_cou
nter);
for i=1:1:ncases_train
for j=1:1:(N-1)
fprintf(fid1,'%3.0f,',initial_gen_data(j,i));
end
fprintf(fid1,'%3.0f',initial_gen_data(N,i));

fprintf(fid1,'\n');
end
train_data = initial_gen_data(1:(N-1),:)';
train_names = initial_gen_data(N,:)';
save('art_train_data.txt','train_data','-ASCII','-tabs');
save('art_train_names.txt','train_names','-ASCII','-tabs');
% *****************************************************
% To Generate Test Cases
% *****************************************************

initial_gen_tdata = zeros(N, ncases_test);
for m=1:ncases_test
initial_gen_tdata(:,m) = cell2num(sample_bnet(bnet));
end


D:\CD\Noiseless Data\tanb_data_run.m
Printed at 20:58 on 14 Mar 2005
127
128
129
130
131
132
133
134
135
136
137
138

139
140
141
142
143
144

Page 3 of 10

fprintf(fid1,'\nTest Data Run %3.0f with %3.0f Training Examples\n',run,train_counter);
for i=1:1:ncases_test
for j=1:1:(N-1)
fprintf(fid1,'%3.0f,',initial_gen_tdata(j,i));
end
fprintf(fid1,'%3.0f',initial_gen_tdata(N,i));
fprintf(fid1,'\n');
end
test_data = initial_gen_tdata(1:(N-1),:)';
test_names = initial_gen_tdata(N,:)';
save('art_test_data.txt','test_data','-ASCII','-tabs');
save('art_test_names.txt','test_names','-ASCII','-tabs');

% ***************************************************************************************
**************************************
145 % Calculating Accuracy of Naive Bayes
146 % ***************************************************************************************
**************************************
147
148 x=load('art_train_data.txt');
149 y=load('art_train_names.txt');

150 xt=load('art_test_data.txt');
151 yt=load('art_test_names.txt');
152
153 num_var = size(x,2);
154 length = size(x,1);
155
156 observed_node_count = 0;
157 observed_nodes = zeros(1,N-1);
158 for observed_col = 1:1:(N-1)
159
observed_node_count = observed_node_count+1;
160
observed_nodes(1,observed_col) = observed_node_count;
161 end
162 % ***************************************************************************************
**************************************
163 % Creating the Structure of NB
164 % ***************************************************************************************
**************************************
165
166 nb_dag = zeros(N,N);
167 nb_dag(N,1:(N-1))=1; % Root Node to all nodes
168
169
170 bnet_test = mk_bnet(nb_dag,ns,'observed',observed_nodes);
171 seed = 0;
172 rand('state',seed);
173
174 for node_test = 1:1:(N)
175

bnet_test.CPD{node_test} = tabular_CPD(bnet_test, node_test);
176 end
177
178 tic;
179
180 % Learning Params
181 x(:,size(x,2)+1)=y(:,1);
182 bnet_test = learn_params(bnet_test,x');
183
184 % *****************************************************
185 % To Display the Learned Parameters
186 % *****************************************************
187


D:\CD\Noiseless Data\tanb_data_run.m
Printed at 20:58 on 14 Mar 2005
188
189
190
191
192
193
194
195
196
197
198
199
200

201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230

231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251

Page 4 of 10

fprintf(fid,'\nDisplaying Learned Parameters using Actual Network Structure.\n');
CPT3 = cell(1,N);
for i=1:N
s=struct(bnet_test.CPD{i}); % violate object privacy
CPT3{i}=s.CPT;
fprintf(fid,'\nDisplaying Learned Parameters for node %2.0d',i);

fprintf(fid,'\n');
display_CPT(fid,CPT3{i})
end
fprintf(fid,'\n');

engine = jtree_ndx_inf_engine(bnet_test);
evidence = cell(1,num_var+1); % for inference, the instances should be fed in one at a ti
me
class = zeros(size(xt,1),3);
llhood = zeros(size(xt,1),1);
correct = 0;
unknown = 0;
% Inference on Test
for row= 1:1:size(xt,1)
for col= 1:1:(num_var)
evidence{1,col}=xt(row,col);
end
engine = enter_evidence(engine,evidence');
m = marginal_nodes(engine,N);
% to output the marginals calculated into classes
class(row,1) = m.T(1);
class(row,2) = m.T(2);
if class(row,1)>class(row,2)
class(row,3) = 1;
llhood(row,1) = class(row,1);
elseif class(row,3)class(row,3) = 2;
llhood(row,1) = class(row,2);
else
class(row,3) = -1;

llhood(row,1) = 1;
end
end
% *****************************************************
% To compute the accuracy of classification
% *****************************************************
check = zeros(size(xt,1),1);
for row = 1:1:size(xt,1)
if class(row,3) == yt(row,1)
check(row,1)=1;
end
end
correct = size(find(check),1);
temp = class(:,3);
unknown = size(find(temp<-0.5),1);
BN_llhood = sum(-1*log(llhood));
NB_Time = toc;
fprintf(fid,'\n%3.0f correct for Actual Structure.\n',correct);


D:\CD\Noiseless Data\tanb_data_run.m
Printed at 20:58 on 14 Mar 2005
252
253
254
255
256
257
258
259


Page 5 of 10

fprintf(fid,'\n%3.3f is the Actual Negative Log Likelihood Score.\n',BN_llhood);
fprintf(fid,'\n%3.0f unknown for Actual Structure.\n',unknown);
fprintf(fid,'\n%3.0f time for inference and learning for Actual Structure.\n',NB_Time);
%
%
%
%
;

fprintf(fid1,'\n%3.0f
fprintf(fid1,'\n%3.3f
fprintf(fid1,'\n%3.0f
fprintf(fid1,'\n%3.0f

correct for Actual Structure.\n',correct);
is the Actual Negative Log Likelihood Score.\n',BN_llhood);
unknown for Actual Structure.\n',unknown);
time for inference and learning for Actual Structure.\n',NB_Time)

260
261 fprintf(fid2,'%3.0f\t%3.3f\t%3.0f\t%3.2f\t',correct,BN_llhood,unknown,NB_Time);
262
263 clear x y xt yt engine evidence bnet_test correct unknown BN_llhood NB_Time temp;
264
265 % ***************************************************************************************
*******************************************
266 % ***************************************************************************************

*******************************************
267 % ***************************************************************************************
*******************************************
268
269
270
271 x=load('art_train_data.txt');
272 y=load('art_train_names.txt');
273 xt=load('art_test_data.txt');
274 yt=load('art_test_names.txt');
275
276 num_var = size(x,2);
277 length = size(x,1);
278 data=zeros(train_counter,2);
279 tic;
280 % Build a Maximumlly Weighted Tree
281 for var1 = 1:1:num_var
282
for var2 = 1:1:num_var
283
if var1~=var2
284
285
data(:,1) = x(:,var1);
286
data(:,2) = x(:,var2);
287
cmi_score(var1,var2)=cmi(data,y,2,2);
288
else

289
cmi_score(var1,var2)=0;
290
end
291
end
292 end
293
294 A = minimum_spanning_tree(-1*cmi_score);
295 % draw_graph(A)
296
297
298 % *****************************************************
299 % To Create BNET Structure for BNT
300 % *****************************************************
301
302 node_sizes = 2*ones(1,num_var+1);
303 T = mk_rooted_sptree(A,3);
304
305 T(N,1:(N-1))=1;
306
307 bnet1 = mk_bnet(T,node_sizes,'observed',observed_nodes);
308 seed=0;
309 rand('state',seed);
310
311 for node_bnet = 1:1:(N)
312
bnet1.CPD{node_bnet} = tabular_CPD(bnet1, node_bnet);



D:\CD\Noiseless Data\tanb_data_run.m
Printed at 20:58 on 14 Mar 2005
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340

341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361

Page 6 of 10

end

% draw_graph(bnet1.dag)
%
%
%

%
%

*****************************************************
To Learn Parameters
*****************************************************
the data has to be fed in with examples in columns
the data to be fed in must be (1,2) and not (1,0)

x(:,size(x,2)+1)=y(:,1);
bnet1 = learn_params(bnet1,x');
TANB_learn_time = toc;
fprintf(fid,'\nTANB Structure\n');
drawT = full(T);
for i=1:1:N
for j=1:1:N
fprintf(fid,'%3.0f\t',drawT(i,j));
end
fprintf(fid,'\n');
end
fprintf(fid,'\n');
% *****************************************************
% To Display the Learned Parameters
% *****************************************************
fprintf(fid,'\nDisplaying Learned Parameters for TANB.\n');
CPT3 = cell(1,N);
for i=1:N
s=struct(bnet1.CPD{i}); % violate object privacy
CPT3{i}=s.CPT;
fprintf(fid,'\nDisplaying Learned Parameters for node %3.d',i);

fprintf(fid,'\n');
display_CPT(fid,CPT3{i})
end
fprintf(fid,'\n');
% *****************************************************
% To Infer on the test case
% *****************************************************
tic;
engine = jtree_ndx_inf_engine(bnet1);
evidence = cell(1,num_var+1); % for inference, the instances should be fed in one at a ti
me
class = zeros(size(xt,1),3);
llhood = zeros(size(xt,1),1);
correct = 0;
unknown = 0;

362
363
364
365
366
367 for row= 1:1:size(xt,1)
368
for col= 1:1:(num_var)
369
evidence{1,col}=xt(row,col);
370
end
371
engine = enter_evidence(engine,evidence');

372
m = marginal_nodes(engine,N);
373
374
% to output the marginals calculated into classes
375
class(row,1) = m.T(1);
376
class(row,2) = m.T(2);


D:\CD\Noiseless Data\tanb_data_run.m
Printed at 20:58 on 14 Mar 2005
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393

394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422


Page 7 of 10

if class(row,1)>class(row,2)
class(row,3) = 1;
llhood(row,1) = class(row,1);
elseif class(row,3)class(row,3) = 2;
llhood(row,1) = class(row,2);
else
class(row,3) = -1;
llhood(row,1) = 1;
end
end
% *****************************************************
% To compute the accuracy of classification
% *****************************************************
check = zeros(size(xt,1),1);
for row = 1:1:size(xt,1)
if class(row,3) == yt(row,1)
check(row,1)=1;
end
end
% save('art_pred.txt','class','-ASCII','-tabs');
correct = size(find(check),1);
temp = class(:,3);
unknown = size(find(temp<-0.5),1);
BN_llhood = sum(-1*log(llhood));
TANB_infer_time = toc;
TANB_time=TANB_learn_time+TANB_infer_time;
fprintf(fid,'\n%3.0f

fprintf(fid,'\n%3.3f
fprintf(fid,'\n%3.0f
fprintf(fid,'\n%3.2f
fprintf(fid,'\n%3.2f
fprintf(fid,'\n%3.2f
%
%
%
%
%
%
;

correct for TANB Structure.\n',correct);
is the TANB Negative Log Likelihood Score.\n',BN_llhood);
unknown for TANB Structure.\n',unknown);
time for learning for TANB Structure.\n',TANB_learn_time);
time for inference for TANB Structure.\n',TANB_infer_time);
time for inference and learning for TANB Structure.\n',TANB_time);

fprintf(fid1,'\n%3.0f
fprintf(fid1,'\n%3.3f
fprintf(fid1,'\n%3.0f
fprintf(fid1,'\n%3.2f
fprintf(fid1,'\n%3.2f
fprintf(fid1,'\n%3.2f

correct for TANB Structure.\n',correct);
is the TANB Negative Log Likelihood Score.\n',BN_llhood);
unknown for TANB Structure.\n',unknown);

time for learning for TANB Structure.\n',TANB_learn_time);
time for inference for TANB Structure.\n',TANB_infer_time);
time for inference and learning for TANB Structure.\n',TANB_time)

423
424 fprintf(fid2,'%3.0f\t%3.3f\t%3.0f\t%3.2f\t%3.2f\t%3.2f\t',correct,BN_llhood,unknown,TANB_
learn_time,TANB_infer_time,TANB_time);
425
426 clear x y xt yt engine evidence bnet1 correct unknown BN_llhood TANB_learn_time TANB_infe
r_time TANB_time;
427
428 % ***************************************************************************************
*******************************************
429 % ***************************************************************************************
*******************************************
430 % ***************************************************************************************
*******************************************
431
432 x=load('art_train_data.txt');
433 y=load('art_train_names.txt');
434 xt=load('art_test_data.txt');
435 yt=load('art_test_names.txt');


D:\CD\Noiseless Data\tanb_data_run.m
Printed at 20:58 on 14 Mar 2005
436
437
438
439

440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469

470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499

500

tic;
% *****************************************************
% To Create FULL BNET Structure for BNT
% *****************************************************
x(:,size(x,2)+1)=y(:,1);
node_sizes = 2*ones(1,N);
node_count = 0;
order = zeros(1,N);
for order_row = 2:1:N
node_count = node_count+1;
order(1,order_row) = node_count;
end
order(1,1) = N;
max_parents = 5;

fullT = learn_struct_K2(x',node_sizes,order,'max_fan_in',max_parents);
bnet2 = mk_bnet(fullT,node_sizes,'observed',observed_nodes);
seed=0;
rand('state',seed);
for node_bnet1 = 1:1:N
bnet2.CPD{node_bnet1} = tabular_CPD(bnet2, node_bnet1);
end
% draw_graph(bnet1.dag)
%
%
%
%
%


*****************************************************
To Learn Parameters
*****************************************************
the data has to be fed in with examples in columns
the data to be fed in must be (1,2) and not (1,0)

bnet2 = learn_params(bnet2,x');
BN_learn_time = toc;
fprintf(fid,'\nK2 Learned BN Structure\n');
drawfullT = full(fullT);
for i=1:1:N
for j=1:1:N
fprintf(fid,'%3.0f\t',drawfullT(i,j));
end
fprintf(fid,'\n');
end
fprintf(fid,'\n');
% *****************************************************
% To Display the Learned Parameters
% *****************************************************
fprintf(fid,'\nDisplaying Learned Parameters for Full BAN.\n');
CPT4 = cell(1,N);
for i=1:N
s=struct(bnet2.CPD{i}); % violate object privacy
CPT4{i}=s.CPT;
fprintf(fid,'\nDisplaying Learned Parameters for node %3.d',i);
fprintf(fid,'\n');
display_CPT(fid,CPT4{i})
end


Page 8 of 10


D:\CD\Noiseless Data\tanb_data_run.m
Printed at 20:58 on 14 Mar 2005
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524

525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554

555
556
557
558
559
560
561
562
563
564

Page 9 of 10

fprintf(fid,'\n');

% *****************************************************
% To Infer on the test case
% *****************************************************
tic;
engine = jtree_ndx_inf_engine(bnet2);
evidence = cell(1,num_var+1); % for inference, the instances should be fed in one at a ti
me
class = zeros(size(xt,1),3);
llhood = zeros(size(xt,1),1);
correct = 0;
unknown = 0;
for row= 1:1:size(xt,1)
for col= 1:1:(num_var)
evidence{1,col}=xt(row,col);
end

engine = enter_evidence(engine,evidence');
m = marginal_nodes(engine,N);
% to output the marginals calculated into classes
class(row,1) = m.T(1);
class(row,2) = m.T(2);
if class(row,1)>class(row,2)
class(row,3) = 1;
llhood(row,1) = class(row,1);
elseif class(row,3)class(row,3) = 2;
llhood(row,1) = class(row,2);
else
class(row,3) = -1;
llhood(row,1) = 1;
end
end
% *****************************************************
% To compute the accuracy of classification
% *****************************************************
check = zeros(size(xt,1),1);
for row = 1:1:size(xt,1)
if class(row,3) == yt(row,1)
check(row,1)=1;
end
end
% save('art_pred_full.txt','class','-ASCII','-tabs');
correct = size(find(check),1);
temp = class(:,3);
unknown = size(find(temp<-0.5),1);
BN_llhood = sum(-1*log(llhood));

BN_infer_time = toc;
BN_time = BN_learn_time+BN_infer_time;

fprintf(fid,'\n%3.0f correct for Full BN.\n',correct);
fprintf(fid,'\n%3.3f is the BN Negative Log Likelihood Score.\n',BN_llhood);


D:\CD\Noiseless Data\tanb_data_run.m
Printed at 20:58 on 14 Mar 2005
565
566
567
568
569
570
571
572
573
574
575
576
577

Page 10 of 10

fprintf(fid,'\n%3.0f unknown for FULL BN Structure.\n',unknown);
fprintf(fid,'\n%3.2f needed to learn for K2 algo.\n',BN_learn_time);
fprintf(fid,'\n%3.2f needed to infer for K2 algo.\n',BN_infer_time);
fprintf(fid,'\n%3.2f needed to infer and learn for K2 algo.\n',BN_time);
fprintf(fid,'\n**************************************************************************

*******\n');
% fprintf(fid1,'\n%3.0f correct for Full BN.\n',correct);
% fprintf(fid1,'\n%3.3f is the BN Negative Log Likelihood Score.\n',BN_llhood);
% fprintf(fid1,'\n%3.0f unknown for FULL BN Structure.\n',unknown);
% fprintf(fid1,'\n%3.2f needed to learn for K2 algo.\n',BN_learn_time);
% fprintf(fid1,'\n%3.2f needed to infer for K2 algo.\n',BN_infer_time);
% fprintf(fid1,'\n%3.2f needed to infer and learn for K2 algo.\n',BN_time);
% fprintf(fid1,'\n***********************************************************************
**********\n');

578
579 fprintf(fid2,'%3.0f\t%3.3f\t%3.0f\t%3.2f\t%3.2f\t%3.2f\t',correct,BN_llhood,unknown,BN_le
arn_time,BN_infer_time,BN_time);
580
581 clear x y xt yt engine evidence bnet2 correct unknown BN_llhood BN_learn_time BN_infer_ti
me BN_time;
582
583 fprintf('\nEND OF SIMULATION for Node %3.0f with %3.0f Training Examples for Run %3.0f\n'
, N, train_counter,run);
584
585 end % End of Loop for one specific definition of parameters
586 fclose(fid);
587 fclose(fid1);
588 fclose(fid2);
589 end %
590 end % End of Loop for a run of a number of nodes
591
592 % *****************************************************
593 % Additional Functions
594 % *****************************************************

595
596 function display_CPT(fid,CPT)
597
598 n = ndims(CPT);
599 parents_size = size(CPT);
600 parents_size = parents_size(1:end-1);
601 child_size = size(CPT,n);
602 c = 1;
603 for i=1:prod(parents_size)
604
parent_inst = ind2subv(parents_size, i);
605
fprintf(fid, '%d ', parent_inst);
606
fprintf(fid, ': ');
607
index = num2cell([parent_inst 1]);
608
index{n} = ':';
609
fprintf(fid, '%6.4f ', CPT(index{:}));
610
fprintf(fid, '\n');
611 end


D:\CD\Noiseless Data\bn_data_run_20.m
Printed at 20:58 on 14 Mar 2005
1
2

3
4
5
6
7
8
9

Page 1 of 12

function bn_data_run_20()
% Used to do inference testing for BN Data Strcuture for 100 Nodes
% This code involves noise in both training and testing sets
% This is the corrected code !!!!!!

fid=fopen('BN Data Structure Full Summary.txt','w');
fprintf(fid,'This is the Full details for NB Data Structure with varying Data Size.');
fprintf(fid,'\n----------------------------------------------------------------------\n')
;
10 fclose(fid);
11
12 fid1=fopen('BN Data Structure Dataset.txt','w');
13 fclose(fid1);
14
15 fid2=fopen('BN Data Structure Summarised Results.csv','w');
16 fprintf(fid2,'Nodes,RunNum,TrainNum,NBCorrect,NBNegllhood,NBUnknown,NBTime,TANBCorrect,TA
NBNegllhood,TANBUnknown,TANBLearntime,TANBInferTime,TANBtime,BNCorrect,BNNegllhood,BNUnkn
own,BNLearnTime,BNinferTime,BNtime');
17 fclose(fid2);
18

19 train_counter_table = [100;200;300;400;500;1000];
20
21 for num_of_nodes = 20:20:20
22
for num_of_links = 5:5:20
23
for run = 1:1:20
24
25
fid=fopen('BN Data Structure Full Summary.txt','a');
26
fid1=fopen('BN Data Structure Dataset.txt','a');
27
fid2=fopen('BN Data Structure Summarised Results.csv','a');
28
29
% *****************************************
30
% Structure
31
% *****************************************
32
33
N = num_of_nodes + 1;
34
initial = ones(num_of_nodes);
35
first = triu(initial,1);
36
second = tril(first,num_of_links);

37
second(N,:)=zeros(1,num_of_nodes);
38
second(:,N)=zeros(N,1);
39
second(N,1:(N-1))=1; % Root Node to all nodes
40
dag = second;
41
clear initial first second;
42
43
% *****************************************
44
% Creating Network
45
% *****************************************
46
observed_node_count = 0;
47
observed_nodes = zeros(1,N-1);
48
for observed_col = 1:1:(N-1)
49
observed_node_count = observed_node_count+1;
50
observed_nodes(1,observed_col) = observed_node_count;
51
end
52

53
false = 1; true = 2;
54
ns = 2*ones(1,N); % binary nodes
55
56
bnet = mk_bnet(dag,ns,'observed',observed_nodes);
57
58
% *****************************************
59
% Inputing Parameters
60
% *****************************************
61
62
rand('state',sum(100*clock));


D:\CD\Noiseless Data\bn_data_run_20.m
Printed at 20:58 on 14 Mar 2005
63
64
65
66
67
68
69
70
71

72
73

Page 2 of 12

for nodule = 1:1:(N-1)
bnet.CPD{nodule} = tabular_CPD(bnet, nodule);
end
bnet.CPD{N} = tabular_CPD(bnet, N, [0.5 0.5]);
% *****************************************************
% To Display the Actual Parameters
% *****************************************************
%

fprintf(fid,'\nDisplaying Actual Parameters for Actual Netw

%
%
%
%
%

CPT3 = cell(1,N);
for i=1:N
s=struct(bnet.CPD{i}); % violate object privacy
CPT3{i}=s.CPT;
fprintf(fid,'\nDisplaying Actual Parameters for node %2

ork.\n');
74

75
76
77
78
.0d',i);
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103

104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

%
%
%

fprintf(fid,'\n');
display_CPT(fid,CPT3{i})
end


fprintf(fid,'\n');
for train_counter_row = 1:1:6
train_counter = train_counter_table(train_counter_row,1);

fprintf(fid,'\nNumber of Nodes %3.0f\n',num_of_nodes);
fprintf(fid,'\nRun Number %3.0f\n',run);
fprintf(fid,'\n%3.0f training examples\n',train_counter);
fprintf(fid1,'\nNumber of Nodes %3.0f\n',num_of_nodes);
fprintf(fid1,'\nRun Number %3.0f\n',run);
fprintf(fid1,'\n%3.0f training examples\n',train_counter);
fprintf(fid2,'\n%3.0f,%3.0f,%3.0f,',num_of_nodes,run,train_counter);
% *****************************************************
% To Generate Training Cases
% *****************************************************
seed = 0;
rand('state', seed);
randn('state', seed);
ncases_train = train_counter;
ncases_test = 10000;
initial_gen_data = zeros(N, ncases_train);
for m=1:ncases_train
initial_gen_data(:,m) = cell2num(sample_bnet(bnet));
end
train_data = initial_gen_data(1:(N-1),:)';
train_names = initial_gen_data(N,:)';
save('art_train_data.txt','train_data','-ASCII','-tabs');
save('art_train_names.txt','train_names','-ASCII','-tabs');
nodes_string = int2str(num_of_nodes);
run_string = int2str(run);

train_string = int2str(train_counter);
num_of_links_string = int2str(num_of_links);


D:\CD\Noiseless Data\bn_data_run_20.m
Printed at 20:58 on 14 Mar 2005
126

Page 3 of 12

train_filename = strcat(nodes_string,'_nodes_',num_of_links_string,'_max_
links_',run_string,'_run_',train_string,'_train_num_TRAIN.csv');
127
dlmwrite(train_filename,initial_gen_data');
128
129
% *****************************************************
130
% To Generate Test Cases
131
% *****************************************************
132
133
134
initial_gen_tdata = zeros(N, ncases_test);
135
for m=1:ncases_test
136
initial_gen_tdata(:,m) = cell2num(sample_bnet(bnet));
137

end
138
139
fprintf(fid1,'\nTest Data Run %3.0f with %3.0f Training Examples\n',run,t
rain_counter);
140
141
test_data = initial_gen_tdata(1:(N-1),:)';
142
test_names = initial_gen_tdata(N,:)';
143
144
save('art_test_data.txt','test_data','-ASCII','-tabs');
145
save('art_test_names.txt','test_names','-ASCII','-tabs');
146
147
test_filename = strcat(nodes_string,'_nodes_',num_of_links_string,'_max_l
inks_',run_string,'_run_',train_string,'_train_num_TEST.csv');
148
dlmwrite(test_filename,initial_gen_tdata');
149
150 %
% *********************************************************************
********************************************************
151 %
% Calculating Accuracy of Actual Prediction or Bayes Error
152 %
% *********************************************************************
********************************************************

153 %
154 %
xt=load('art_test_data.txt');
155 %
yt=load('art_test_names.txt');
156 %
157 %
engine = jtree_ndx_inf_engine(bnet);
158 %
evidence = cell(1,N); % for inference, the instances should be fed in o
ne at a time
159 %
class = zeros(size(xt,1),3);
160 %
llhood = zeros(size(xt,1),1);
161 %
correct = 0;
162 %
unknown = 0;
163 %
164 %
% Inference on Test
165 %
166 %
for row= 1:1:size(xt,1)
167 %
for col= 1:1:(N-1)
168 %
evidence{1,col}=xt(row,col);
169 %

end
170 %
engine = enter_evidence(engine,evidence');
171 %
m = marginal_nodes(engine,N);
172 %
173 %
% to output the marginals calculated into classes
174 %
class(row,1) = m.T(1);
175 %
class(row,2) = m.T(2);
176 %
if class(row,1)>class(row,2)
177 %
class(row,3) = 1;
178 %
llhood(row,1) = class(row,1);
179 %
elseif class(row,3)180 %
class(row,3) = 2;
181 %
llhood(row,1) = class(row,2);
182 %
else
183 %
class(row,3) = -1;
184 %
llhood(row,1) = 1;



D:\CD\Noiseless Data\bn_data_run_20.m
Printed at 20:58 on 14 Mar 2005
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210

211
212
213
214
215

%
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%

_llhood);
%
%
%
%
%
%

Page 4 of 12

end
end
% *****************************************************
% To compute the accuracy of classification
% *****************************************************
check = zeros(size(xt,1),1);
for row = 1:1:size(xt,1)
if class(row,3) == yt(row,1)
check(row,1)=1;
else
llhood(row,1)=class(row,yt(row,1));
end
end
correct = size(find(check),1);
temp = class(:,3);
unknown = size(find(temp<-0.5),1);
BN_llhood = sum(-1*log(llhood));
fprintf(fid,'\n%3.0f correct for Actual Structure.\n',correct);
fprintf(fid,'\n%3.3f is the Actual Negative Log Likelihood Score.\n',BN
fprintf(fid,'\n%3.0f unknown for Actual Structure.\n',unknown);

fprintf(fid2,'%3.0f,%3.3f,%3.0f,',correct,BN_llhood,unknown);
clear xt yt engine evidence correct unknown BN_llhood temp;

% ***********************************************************************
******************************************************
216
% Calculating Accuracy of Naive Bayes Prediction
217
% ***********************************************************************
******************************************************
218
219
x=load('art_train_data.txt');
220
y=load('art_train_names.txt');
221
xt=load('art_test_data.txt');
222
yt=load('art_test_names.txt');
223
224
num_var = size(x,2);
225
length = size(x,1);
226
227
dag_test = zeros(N,N);
228
dag_test(N,1:(N-1))=1; % Root Node to all nodes
229

230
bnet_test = mk_bnet(dag_test,ns,'observed',observed_nodes);
231
seed = 0;
232
rand('state',seed);
233
234
for node_test = 1:1:(N)
235
bnet_test.CPD{node_test} = tabular_CPD(bnet_test, node_test,'prior_ty
pe','dirichlet','dirichlet_weight',1,'dirichlet_type','unif');
236
end
237
238
tic;
239
240
% Learning Params
241
x(:,size(x,2)+1)=y(:,1);
242
bnet_test = learn_params(bnet_test,x');
243
244
% *****************************************************
245
% To Display the Learned Parameters



×