Algorithms: Does not the "push" model focused on the data for the evaluation of SQL make sense?

The traditional way to implement a SQL database is to analyze the query, create an analysis tree and then evaluate it using the "volcano" model. In this model, call .next () on the root node of the tree to get the next tuple, and each node calls .next () on its secondary nodes, filtering the tuple up into the tree.

In 2011, the efficient compilation of efficient query plans for modern hardware introduced the data-driven query evaluation model. The theory is that calling .next () over and over again is expensive and leads to poor data location. Instead, the author suggests a model in which children push tuples to parents using a closed cycle.

A number of other documents have cited this document and everyone seems to think it is the way to go. They suggest huge accelerations for in-memory databases.

This does not make any sense to me. To push a tuple upwards, so many functions will be required to pull it down. And in the nodes where the tuples must accumulate instead of pushing them one at a time ("pipe switches"), both models will behave identically.

So I compared it, pushing against pulling against hand-rolled code. I used a slight variation in the insertion model suggested by How to Design a Query Compiler, Revisited, in which you pass the callbacks down the tree.

I simulate the evaluation of this query, select right.a, right.b from left to right, where left.b> 90 and left.a = right.a;, where each table has 10 million rows in memory generated randomly.

Here are the results (Java 11, on a Mac Mini):

push rows by execution: 8606256.0 elapsed: 147932 milis. 1479.32 mils per cycle.
throw rows by execution: 8606256.0 elapsed: 141929 milis. 1419.29 mils per cycle.
rows rolled by hand per run: 8606256.0 elapsed: 137589 milis. 1375.89 milliseconds per cycle.

Final result: almost zero difference between the three implementations. Certainly not the orders of magnitude suggested by some of the documents.

So, what am I missing here? Is the evaluation of push meaningless? Have I wrongly implemented the reference code? Or is the JVM so intelligent that it simply optimizes all calls to .next () and the push model has no advantages? (I doubt it, because the author used Scala in the JVM, which should get the same optimizations).

Here is the reference code:

public class BenchmarkPushModel {

// select right.a, right.b from the left, right where left.b> 90 and left.a = right.a;

static int end ROW_COUNT = 10000000;
final static int RUN_COUNT = 100;
static int end LEN_STRING_FIELD = 4;
final static int MAX_VAL_INT_FIELD = 100;
final static int INT_FIELD_PREDICATE_THRESHOLD = 90;

Random rand = new Random (0);
int rowsOut;
Chain label;
long start


Empty main static public (String [] args) {
BenchmarkPushModel b = new BenchmarkPushModel ();
b.go ();
}

empty private go () {
List leftRows = makeRows ();
List rightRows = makeRows ();

start ("push");
rowsOut = 0;
for (int run = 0; run < RUN_COUNT; run++) {
            PushOp pushOp = new PushHashJoin(new PushFilter(new PushScan(leftRows)), new PushScan(rightRows));
            pushOp.exec(row -> {
rowsOut ++;
});
}
end ("insert rows by execution:" + ((float) rowsOut / RUN_COUNT), RUN_COUNT);

start ("pull");
rowsOut = 0;
for (int run = 0; run <RUN_COUNT; run ++) {
PullOp pullOp = new PullHashJoin (new PullFilter (new PullScan (leftRows)), new PullScan (rightRows));
pullOp.open ();
while (true) {
Row row = pullOp.next ();
if (row == null) {
break;
}
rowsOut ++;
}
}
end ("drag rows by execution:" + ((float) rowsOut / RUN_COUNT), RUN_COUNT);

// code rolled by hand
start ("rolled by hand");
rowsOut = 0;
for (int run = 0; run <RUN_COUNT; run ++) {
HashMap map = new HashMap <> ();
for (Row Row: leftRows) {
if (row.b> INT_FIELD_PREDICATE_THRESHOLD) {
map.put (row.a, row);
}
}
for (Row Row: rightRows) {
if (map.get (row.a)! = null) {
rowsOut ++;
}
}
}
end ("hand-rolled rows per run:" + ((float) rowsOut / RUN_COUNT), RUN_COUNT);

}


Private list makeRows () {
List rows = new ArrayList <> ();
StringBuilder sb = new StringBuilder ();
for (int i = 0; i <ROW_COUNT; i ++) {
Row row = new Row ();
sb.setLength (0);
for (int j = 0; j <LEN_STRING_FIELD; j ++) {
sb.append ((char) (rand.nextInt (26) + & # 39; a & # 39;));
}
row.a = sb.toString ();
row.b = rand.nextInt (MAX_VAL_INT_FIELD);
rows.add (row);
}
return rows;
}

row of class
Rope a;
int b;
public string to the string () {
returns to + "" + b;
}
}

////////////////////////////////////////////////// ///
// Pull Ops
PullOp interface {
open vacuum ();
Next row ();
}

The PullScan class implements PullOp {

Iterator that;

PullScan (List rows) {
this.it = rows.iterator ();
}

@Cancel
open public void () {
}

@Cancel
next public row () {
if (it.hasNext ()) {
return it following ();
}
zero return
}

}

The PullFilter class implements PullOp {

PullOp child;

Public PullFilter (son PullOp) {
this.child = child;
}

@Cancel
open public void () {
child.open ();
}

@Cancel
next public row () {
while (true) {
Row row = child.next ();
if (row == null) {
zero return
}
if (row.b> INT_FIELD_PREDICATE_THRESHOLD) {
return row
}
continue;
}
}
}

The PullHashJoin class implements PullOp {
PullOp left;
Right PullOp;
HashMap map = new HashMap <> ();

Public PullHashJoin (PullOp left, PullOp right) {
this.left = left;
this.right = correct;
}

@Cancel
open public void () {
left open ();
right.open ();

// do the hash of the left table
while (true) {
Row row = left.Next ();
if (row == null) {
break;
}
map.put (row.a, row);
}
}

@Cancel
next public row () {
while (true) {
Row row = right.Next ();
if (row == null) {
zero return
}
if (map.get (row.a)! = null) {
return row
}
}
}

}



////////////////////////////////////////////////// ///
// Push Ops


PushCallback interface {
accept public vacuum (row row);
}

PushOp interface {
public void exec (PushCallback cb);
}

The PushScan class implements PushOp {

List rows

PushScan (List rows) {
this.rows = rows;
}

@Cancel
public void exec (PushCallback cb) {
for (row row: rows) {
cb.accept (row);
}
}

}

The PushFilter class implements PushOp {

PushOp child;

Public PushFilter (PushOp child) {
this.child = child;
}

@Cancel
public void exec (PushCallback cb) {
child.exec (row -> {
if (row.b> INT_FIELD_PREDICATE_THRESHOLD) {
cb.accept (row);
}
});
}
}

The PushHashJoin class implements PushOp {
PushOp left;
Right PushOp;

Public PushHashJoin (PushOp left, PushOp right) {
this.left = left;
this.right = correct;
}

@Cancel
public void exec (PushCallback cb) {

// do the hash of the left table
HashMap map = new HashMap <> ();
left.exec (row -> {
map.put (row.a, row);
});

right.exec (row -> {
if (map.get (row.a)! = null) {
cb.accept (row);
}
});

}

}


home empty public (chain label) {
this.label = label;
this.start = System.currentTimeMillis ();
}

final empty public (chain label, long count) {
long elapsed = System.currentTimeMillis () - start;
System.out.println (label + "elapsed:" + elapsed + "milis". + (((Double) elapsed) / count) + "millis per cycle.");
}



}