@@ -825,12 +825,14 @@ void clueweb() {
825825 }
826826
827827 // k-tree
828- if (false ) {
828+ if (true ) {
829+ // build tree
829830 int m = 1000 , maxiters = 10 ;
830831 cout << " -----" << endl;
831832 cout << " Building K-tree of order m=" << m
832833 << " , k-means maxiters=" << maxiters << endl;
833834 boost::timer::auto_cpu_timer all;
835+ boost::timer::auto_cpu_timer building;
834836 KTree<vecType, clustererType, distanceType, protoType> kt (m, maxiters);
835837 kt.setDelayedUpdates (true );
836838 kt.setUpdateDelay (1000 );
@@ -845,16 +847,24 @@ void clueweb() {
845847 }
846848 }
847849 cout << endl;
848- cout << " rearranging K-tree" << endl;
850+ building.stop ();
851+ cout << " Building K-tree took " << building.elapsed ().wall / 1e9 << " seconds" << endl;
852+
853+ // rearrange leaves
854+ boost::timer::auto_cpu_timer rearranging;
855+ cout << " Rearranging K-tree" << endl;
849856 kt.rearrange ();
857+ cout << " Rearranging K-tree took " << rearranging.elapsed ().wall / 1e9 << " seconds" << endl;
858+
859+ // print stats
850860 all.stop ();
851861 kt.printStats ();
852862 double seconds = all.elapsed ().wall / 1e9 ;
853- cout << " Building K-tree took " << seconds << " seconds" << endl;
863+ cout << " K-tree took " << seconds << " seconds" << endl;
854864 }
855865
856866 // EM-tree
857- if (true ) {
867+ if (false ) {
858868 int maxiters = 4 ;
859869 int clusters = 110000 ;
860870 int m = (int )sqrt (clusters);
@@ -902,52 +912,67 @@ void clueweb() {
902912
903913 // TSVQ EM-tree hybrid
904914 if (false ) {
905- int clusters = 110000 ;
906- int m = (int )sqrt (clusters);
907- int depth = 3 ;
908- int maxiters = 4 ;
909- int sampleSize = 2000000 ;
915+ // record time for all operations
916+ boost::timer::auto_cpu_timer all;
910917
911918 // sample data
919+ int sampleSize = 2000000 ;
912920 vector < SVector<bool >*> sample = vectors;
913921 random_shuffle (sample.begin (), sample.end ());
914922 sample.resize (sampleSize);
915-
916- // record time for all operations
917- boost::timer::auto_cpu_timer all;
918-
923+
919924 // build TSVQ on sample
925+ int clusters = 110000 ;
926+ int m = (int )sqrt (clusters);
927+ int depth = 3 ;
928+ int tsvqMaxiters = 5 ;
920929 boost::timer::auto_cpu_timer tsvqTimer;
921- TSVQ<vecType, clustererType, distanceType, protoType> tsvq (m, depth, maxiters);
922- tsvqTimer.start ();
930+ TSVQ<vecType, clustererType, distanceType, protoType> tsvq (m, depth, tsvqMaxiters);
923931 tsvq.cluster (sample);
924932 tsvqTimer.stop ();
925933 tsvq.printStats ();
926934 cout << endl << " Building TSVQ on sample took " << tsvqTimer.elapsed ().wall / 1e9 << " seconds" << endl;
927935 cout << " --------" << endl;
928936
929937 // 2 iterations of EM-tree on all data, using TSVQ sample as seed
938+ int emtreeMaxiters = 2 ;
930939 EMTree<vecType, clustererType, distanceType, protoType> emtree (tsvq.getMWayTree ());
931940 boost::timer::auto_cpu_timer emtreeTimer;
932941 {
933942 boost::timer::auto_cpu_timer iter;
934- emtree.EMStep (vectors);
943+
944+ // place all data into TSVQ initialized tree
945+ emtree.replace (vectors);
946+ cout << " placed all points into TSVQ tree" << endl;
947+ emtree.printStats ();
948+ cout << endl << " --------" << endl;
949+
950+ // prune
951+ int pruned = 1 ;
952+ while (pruned > 0 ) {
953+ pruned = emtree.prune ();
954+ }
955+
956+ // update means
957+ emtree.rebuildInternal ();
958+
959+ // print stats
935960 iter.stop ();
936961 cout << " iteration 1 took " << iter.elapsed ().wall / 1e9 << " seconds" << endl;
937- cout << " RMSE = " << emtree.getRMSE ();
938- cout << " --------" << endl;
962+ emtree.printStats ();
963+ cout << endl << " --------" << endl;
939964 }
940- for (int i = 1 ; i < maxiters ; ++i) {
965+ for (int i = 1 ; i < emtreeMaxiters ; ++i) {
941966 boost::timer::auto_cpu_timer iter;
942967 emtree.EMStep ();
943968 iter.stop ();
944969 cout << " iteration " << i + 1 << " took " << iter.elapsed ().wall / 1e9 << " seconds" << endl;
945- cout << " RMSE = " << emtree.getRMSE ();
970+ emtree.printStats ();
946971 cout << " --------" << endl;
947972 }
948973 emtreeTimer.stop ();
949974 emtree.printStats ();
950- cout << endl << " 2 iterations of EM-tree took " << emtreeTimer.elapsed ().wall / 1e9 << " seconds" << endl;
975+ cout << endl << emtreeMaxiters << " iterations of EM-tree took " << emtreeTimer.elapsed ().wall / 1e9 << " seconds" << endl;
951976
952977 // report all time
953978 all.stop ();
0 commit comments