Skip to content

Commit

Permalink
feat: allow row_count=0 in q15 and add SF10 counts (#2401)
Browse files Browse the repository at this point in the history
I wanted the SF10 results for running bigger benchmarks against S3.
  • Loading branch information
danking authored Feb 18, 2025
1 parent 6f52a13 commit 202cffc
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 8 deletions.
45 changes: 38 additions & 7 deletions bench-vortex/src/bin/tpch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ use std::time::{Duration, Instant};
use bench_vortex::display::{print_measurements_json, render_table, DisplayFormat};
use bench_vortex::measurements::QueryMeasurement;
use bench_vortex::tpch::dbgen::{DBGen, DBGenOptions};
use bench_vortex::tpch::{load_datasets, run_tpch_query, tpch_queries, EXPECTED_ROW_COUNTS};
use bench_vortex::tpch::{
load_datasets, run_tpch_query, tpch_queries, EXPECTED_ROW_COUNTS_SF1, EXPECTED_ROW_COUNTS_SF10,
TPC_H_ROW_COUNT_ARRAY_LENGTH,
};
use bench_vortex::{default_env_filter, feature_flagged_allocator, setup_logger, Format};
use clap::Parser;
use indicatif::ProgressBar;
Expand Down Expand Up @@ -106,6 +109,7 @@ fn main() -> ExitCode {
args.formats,
args.display_format,
args.emulate_object_store,
args.scale_factor,
url,
))
}
Expand All @@ -118,8 +122,20 @@ async fn bench_main(
formats: Vec<Format>,
display_format: DisplayFormat,
emulate_object_store: bool,
scale_factor: u8,
url: Url,
) -> ExitCode {
let expected_row_counts = if scale_factor == 1 {
EXPECTED_ROW_COUNTS_SF1
} else if scale_factor == 10 {
EXPECTED_ROW_COUNTS_SF10
} else {
panic!(
"Scale factor {} not supported due to lack of expected row counts.",
scale_factor
);
};

eprintln!(
"Benchmarking against these formats: {}.",
formats.iter().join(", ")
Expand Down Expand Up @@ -196,7 +212,7 @@ async fn bench_main(
for (idx, format, row_count) in row_counts {
format_row_counts
.entry(format)
.or_insert_with(|| vec![0; EXPECTED_ROW_COUNTS.len()])[idx] = row_count;
.or_insert_with(|| vec![0; TPC_H_ROW_COUNT_ARRAY_LENGTH])[idx] = row_count;
}

progress.finish();
Expand All @@ -205,14 +221,29 @@ async fn bench_main(
for (format, row_counts) in format_row_counts {
row_counts
.into_iter()
.zip_eq(EXPECTED_ROW_COUNTS)
.enumerate()
.filter(|(idx, _)| queries.as_ref().map(|q| q.contains(idx)).unwrap_or(true))
.filter(|(idx, _)| exclude_queries.as_ref().map(|excluded| !excluded.contains(idx)).unwrap_or(true))
.for_each(|(idx, (row_count, expected_row_count))| {
if row_count != expected_row_count {
eprintln!("Mismatched row count {row_count} instead of {expected_row_count} in query {idx} for format {format:?}");
mismatched = true;
.for_each(|(idx, actual_row_count)| {
let expected_row_count = expected_row_counts[idx];
if actual_row_count != expected_row_count {
if idx == 15 && actual_row_count == 0 {
eprintln!(
"*IGNORING* mismatched row count {} instead of {} for format {:?} because Query 15 is flaky. See: https://github.com/spiraldb/vortex/issues/2395",
actual_row_count,
expected_row_count,
format,
);
} else {
eprintln!(
"Mismatched row count {} instead of {} in query {} for format {:?}",
actual_row_count,
expected_row_count,
idx,
format,
);
mismatched = true;
}
}
})
}
Expand Down
11 changes: 10 additions & 1 deletion bench-vortex/src/tpch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,18 @@ pub use execute::*;
use vortex::error::VortexError;
use vortex::stream::ArrayStreamAdapter;

pub const EXPECTED_ROW_COUNTS: [usize; 23] = [
pub const TPC_H_ROW_COUNT_ARRAY_LENGTH: usize = 23;
pub const EXPECTED_ROW_COUNTS_SF1: [usize; TPC_H_ROW_COUNT_ARRAY_LENGTH] = [
// The 0th entry is a dummy so that Query 1's row count is at index 1.
0, 4, 460, 11620, 5, 5, 1, 4, 2, 175, 37967, 1048, 2, 42, 1, 1, 18314, 1, 57, 1, 186, 411, 7,
];
pub const EXPECTED_ROW_COUNTS_SF10: [usize; TPC_H_ROW_COUNT_ARRAY_LENGTH] = [
// The 0th entry is a dummy so that Query 1's row count is at index 1.
//
// Generated by executing the SQL in each query file using duckdb with the table names replaced
// by "$NAME.parquet".
0, 4, 4667, 114003, 5, 5, 1, 4, 2, 175, 381105, 0, 2, 46, 1, 1, 27840, 1, 624, 1, 1804, 4009, 7,
];

fn make_object_store(df: &SessionContext, source: &Url) -> anyhow::Result<Arc<dyn ObjectStore>> {
match source.scheme() {
Expand Down

0 comments on commit 202cffc

Please sign in to comment.