/*----------------------------------------------------------*/ /* Agglomerative Hierarchical Clustering */ /*----------------------------------------------------------*/ /* (C) 2001 Zdravko Markov */ /*----------------------------------------------------------*/ /* cluster(Mode,Threshold,Clustering) - returns Clustering */ /* cluster(Mode,Threshold) - prints clustering tree */ /*----------------------------------------------------------*/ /* Mode=lgg_m -> Lgg-based agglomerative clustering. */ /* Minimizing coverage of examples */ /* Merging stops when min coverage>Threshold. */ /*----------------------------------------------------------*/ /* Mode=lgg_s -> Lgg-based agglomerative clustering. */ /* Maximizing size of the lgg */ /* Merging stops when size <= Threshold. */ /*----------------------------------------------------------*/ /* Mode=min -> Single-linkage agglomerative clustering. */ /* Merging stops when distance > Threshold. */ /*----------------------------------------------------------*/ /* Mode=max -> Complete-linkage agglomerative clustering. */ /* Merging stops when distance > Threshold. */ /*----------------------------------------------------------*/ /* show(Clustering,Pos) - prints distance-based clustering */ /* with the root node at Pos. */ /*----------------------------------------------------------*/ /* show_lgg([],Clustering,Pos) - prints lgg-based clustering*/ /* with the root node at Pos. */ /*----------------------------------------------------------*/ cluster(lgg_m,Threshold) :- !, cluster(lgg_m,Threshold,Clustering), show_lgg([],Clustering,0). cluster(lgg_s,Threshold) :- !, cluster(lgg_s,Threshold,Clustering), show_lgg([],Clustering,0). cluster(Mode,Threshold) :- cluster(Mode,Threshold,Clustering), show(Clustering,0). cluster(lgg_m,Threshold,Clustering) :- !, findall(X,example(_,_,X),E), cl_m(E,Clustering,Threshold). cluster(lgg_s,Threshold,Clustering) :- !, findall(X,example(_,_,X),E), cl_s(E,Clustering,Threshold). cluster(Mode,Threshold,Clustering) :- (Mode=min; Mode=max), !, findall(Id,example(Id,_,_),E), cluster(E,Clustering,Mode,Threshold). cluster(Mode,_,_) :- write(Mode-'wrong mode'), fail. /*---------------- Lgg-based clustering --------------------*/ cl_m(E,[C|Clustering],Threshold) :- findall(C/D,(pair(E,A,B),lgg(A,B,C),model(C,D)),All), minimum(All,C/D), D < Threshold + 1, !, remove_subsumed(E,C,R), cl_m([C|R],Clustering,Threshold). cl_m(_,[],_). cl_s(E,[C|Clustering],Threshold) :- findall(C/D,(pair(E,A,B),lgg(A,B,C),length(C,D)),All), maximum(All,C/D), D > Threshold, !, remove_subsumed(E,C,R), cl_s([C|R],Clustering,Threshold). cl_s(_,[],_). lgg(H1,H2,LGG) :- intersection(H1,H2,LGG). model(H,D) :- findall(N,(example(N,_,L),subset(H,L)),M), length(M,D). remove_subsumed([],_,[]) :- !. remove_subsumed([X|T],C,L) :- subset(C,X), !, remove_subsumed(T,C,L). remove_subsumed([X|T],C,[X|L]) :- remove_subsumed(T,C,L). /*--------------- Distance-based clustring -----------------*/ cluster(E,Clustering,Mode,Threshold) :- findall([A,B]/D,(pair(E,A,B),distance(A,B,D,Mode)),All), minimum(All,[A,B]/D), D < Threshold + 1, !, del(A,E,E1), del(B,E1,E2), cluster([[A,B]|E2],Clustering,Mode,Threshold). cluster(E,E,_,_). distance([W,X],[Y,Z],D,M) :- !, distance(W,Y,D1,M), distance(W,Z,D2,M), distance(X,Y,D3,M), distance(X,Z,D4,M), min_max(M,[D1,D2,D3,D4],D). distance([X,Y],Z,D,M) :- !, distance(X,Z,D1,M), distance(Y,Z,D2,M), min_max(M,[D1,D2],D). distance(X,[Y,Z],D,M) :- !, distance(X,Y,D1,M), distance(X,Z,D2,M), min_max(M,[D1,D2],D). distance(X,Y,D,_) :- example(X,_,A), example(Y,_,B), dist(A,B,D). min_max(min,L,D) :- !, min(L,D). min_max(max,L,D) :- max(L,D). dist([],[],0) :- !. dist([X|T],[X|V],N) :- !, dist(T,V,N). dist([_|T],[_|V],N) :- dist(T,V,M), N is M+1. /*------------- Cluster Evaluation ------------------------*/ evaluate(X,Accuracy,Class) :- setof(C,N^E^example(N,C,E),L), distribution(L,X,D,Total), maximum(D,Class/Max), Accuracy is Max/Total. distribution([],_,[],0) :- !. distribution([C|T],X,[C/D|V],S) :- findall(C,(example(_,C,Y),subset(X,Y)),L), length(L,D), !, distribution(T,X,V,S1), S is S1+D. /*------- Show distance-based clustering hierarchy ---------*/ show([[H|T]],P) :- !, show([H|T],P). show([X|Y],P) :- !, tab(P), write(+), nl, P1 is P+2, show(X,P1), showl(Y,P1). show(X,P) :- tab(P), example(X,C,_), !, write(X-C),nl. showl([],_) :- !. showl([X|T],P) :- show(X,P), showl(T,P). /*---------- Show lgg-based clustering hierarchy -----------*/ show_lgg(C,L,P) :- findall(E,example(_,_,E),Es), append(Es,L,All), show(C,All,P). show(C,L,P) :- evaluate(C,A,Class), !, children(L,C,Cl), P1 is P+2, tab(P), write_node(C,A,Class), nl, show_list(Cl,L,P1). write_node(X,_,C) :- example(N,C,E), subset(X,E), subset(E,X), !, write(N-C). write_node(X,A,C) :- write(X-A-C). show_list([],_,_) :- !. show_list([X|T],L,P) :- show(X,L,P), show_list(T,L,P). children([],_,[]) :- !. children([X|T],C,[X|S]) :- X\=C, subset(C,X), \+ (member(Y,T),Y\=C,subset(C,Y),subset(Y,X)), !, children(T,C,S). children([_|T],C,S) :- children(T,C,S). /*--------------------- Utilities --------------------------*/ minimum([X],X) :- !. minimum([X/M|T],Y/N) :- minimum(T,Z/K), (MK,Y/N=X/M ; Y/N=Z/K), !. min([X],X) :- !. min([M|T],N) :- min(T,K), (MK,N=M ; N=K), !. pair([A|L],A,B) :- member(B,L). pair([_|L],A,B) :- pair(L,A,B). del(X,[X|T],T) :- !. del(X,[Y|T],[Y|L]) :- del(X,T,L). subset([],_). subset([X|T],L) :- member(X,L), !, subset(T,L). intersection([],_,[]) :- !. intersection([X|T],L,[X|V]) :- member(X,L), !, intersection(T,L,V). intersection([_|T],L,V) :- intersection(T,L,V).