diff --git a/PerformanceTesting/TestSummary.rst b/PerformanceTesting/TestSummary.rst index df3fb15..de70a45 100644 --- a/PerformanceTesting/TestSummary.rst +++ b/PerformanceTesting/TestSummary.rst @@ -88,6 +88,7 @@ These tests create lots of records, and test how different sources work | 01bf - write xml to a disk file, uncompressed | 01bg - write variable size to disk, compressed | 01bh - write large variable size to disk, compressed +| 01bi - write very large variable size to disk, compressed 01c - Raw disk read speed [class: diskread] ------------------------------------------- @@ -100,6 +101,7 @@ These tests create lots of records, and test how different sources work | 01cf - read xml to a disk file, uncompressed | 01cg - read variable size from disk, compressed | 01ch - read large variable size from disk, compressed +| 01ci - read very large variable size from disk, compressed 01d - Parallel disk write speed [class: diskread,parallel] ---------------------------------------------------------- @@ -207,6 +209,7 @@ TBD:01h - Limits on index reads [class: indexread] | 03bd - Distribute all rows to node self+CLUSTERSIZE/2. | 03be - Distribute variable size rows | 03bf - Distribute large variable size rows +| 03bg - Distribute very large variable size rows 03c - Parallel Distribution [class: distribute] ----------------------------------------------- diff --git a/PerformanceTesting/ecl/01bi_writevlarge.ecl b/PerformanceTesting/ecl/01bi_writevlarge.ecl new file mode 100644 index 0000000..c9347bc --- /dev/null +++ b/PerformanceTesting/ecl/01bi_writevlarge.ecl @@ -0,0 +1,14 @@ +//class=disk +//class=diskwrite +//class=setup + +#option ('hthorMemoryLimit', '4000'); +#option ('hthorDiskWriteSizeLimit', 100000000000); + +import $ as suite; +import suite.perform.config; +import suite.perform.format; +import suite.perform.files; + +ds := DATASET(config.vlargeRecordCount, format.createGrandParent(COUNTER, 1000 + COUNTER, 900 + COUNTER), DISTRIBUTED); +OUTPUT(ds,,files.simpleName+'_vlarge',overwrite,compressed); diff --git a/PerformanceTesting/ecl/01ci_countvlarge.ecl b/PerformanceTesting/ecl/01ci_countvlarge.ecl new file mode 100644 index 0000000..b305689 --- /dev/null +++ b/PerformanceTesting/ecl/01ci_countvlarge.ecl @@ -0,0 +1,15 @@ +//class=disk +//class=quick +//class=diskread + +//noroxie - almost certainly will cause timeout issues due to sending remote rows. Should revisit. + +#option ('hthorMemoryLimit', '4000'); +import ^ as root; +import $ as suite; +import suite.perform.config; +import suite.perform.files; +import suite.perform.format; + +ds := DATASET(files.simpleName+'_vlarge', format.grandParentRec, FLAT); +OUTPUT(COUNT(NOFOLD(ds)) = config.vlargeRecordCount); diff --git a/PerformanceTesting/ecl/03bg_distributevlarge.ecl b/PerformanceTesting/ecl/03bg_distributevlarge.ecl new file mode 100644 index 0000000..455e9ba --- /dev/null +++ b/PerformanceTesting/ecl/03bg_distributevlarge.ecl @@ -0,0 +1,15 @@ +//class=memory +//class=distribute +//nohthor +//noroxie + +import $ as suite; +import suite.perform.config; +import suite.perform.format; +import suite.perform.files; + +ds := DATASET(config.vlargeRecordCount, format.createGrandParent(COUNTER, 1000 + COUNTER, 900 + COUNTER), DISTRIBUTED); + +d := distribute(ds, hash32(id)); + +output(COUNT(NOFOLD(d)) = config.vlargeRecordCount); diff --git a/PerformanceTesting/ecl/key/01bi_writevlarge.xml b/PerformanceTesting/ecl/key/01bi_writevlarge.xml new file mode 100644 index 0000000..aa3e90a --- /dev/null +++ b/PerformanceTesting/ecl/key/01bi_writevlarge.xml @@ -0,0 +1,2 @@ + + diff --git a/PerformanceTesting/ecl/key/01ci_countvlarge.xml b/PerformanceTesting/ecl/key/01ci_countvlarge.xml new file mode 100644 index 0000000..44a8709 --- /dev/null +++ b/PerformanceTesting/ecl/key/01ci_countvlarge.xml @@ -0,0 +1,3 @@ + + true + diff --git a/PerformanceTesting/ecl/key/03bg_distributevlarge.xml b/PerformanceTesting/ecl/key/03bg_distributevlarge.xml new file mode 100644 index 0000000..44a8709 --- /dev/null +++ b/PerformanceTesting/ecl/key/03bg_distributevlarge.xml @@ -0,0 +1,3 @@ + + true + diff --git a/PerformanceTesting/ecl/perform/config.ecl b/PerformanceTesting/ecl/perform/config.ecl index 4f5375e..26df9ce 100644 --- a/PerformanceTesting/ecl/perform/config.ecl +++ b/PerformanceTesting/ecl/perform/config.ecl @@ -15,6 +15,7 @@ export config := MODULE export largeRecordCountPerSlave := 100; // Total serialized memory ~4GB export largeRecordCount := largeRecordCountPerSlave * numSlaves; export largeRecordChildren := 500000; // Total size approx 40MB per row + export vlargeRecordCount := 100 * numSlaves; export variableRecordCount := largeRecordCount * 10000; export variableRecordChildren := largeRecordChildren / 10000; // Total size approx 4K per row diff --git a/PerformanceTesting/ecl/perform/format.ecl b/PerformanceTesting/ecl/perform/format.ecl index 6ae2d09..121b459 100644 --- a/PerformanceTesting/ecl/perform/format.ecl +++ b/PerformanceTesting/ecl/perform/format.ecl @@ -16,6 +16,12 @@ export format := MODULE DATASET(paddedRec) children; END; + export grandParentRec := RECORD + unsigned id; + DATASET(parentRec) c1; + DATASET(parentRec) c2; + END; + export simpleRec mkSimple(unsigned8 id1, unsigned8 id2, unsigned8 id3, unsigned8 id4) := TRANSFORM SELF.id1 := id1; SELF.id2 := id2; @@ -52,4 +58,13 @@ export format := MODULE SELF.children := DATASET(numChildren, createPadded(id + startChild)); END; + export grandParentRec createGrandParent(unsigned8 id, unsigned4 numChildren1, unsigned4 numChildren2) := TRANSFORM + id2 := HASH64(id); + id3 := HASH64(id2); + id4 := HASH64(id3); + SELF.id := id; + SELF.c1 := DATASET(numChildren1, createParent(id, 800, id2)); + SELF.c2 := DATASET(numChildren2, createParent(id, 1000, id3)); + END; + END;