and physical plan is as following, I guess the second AggregateExec's schema field "COUNT(DISTINCT demo.name)[count distinct]" which is nullable cause the error.
ProjectionExec {
expr: [
(
Column {
name: "t",
index: 0,
},
"t",
),
(
Column {
name: "COUNT(DISTINCT demo.name)",
index: 1,
},
"COUNT(DISTINCT demo.name)",
),
],
schema: Schema {
fields: [
Field {
name: "t",
data_type: Timestamp(
Millisecond,
None,
),
nullable: false,
dict_id: 0,
dict_is_ordered: false,
metadata: None,
},
Field {
name: "COUNT(DISTINCT demo.name)",
data_type: Int64,
nullable: true,
dict_id: 0,
dict_is_ordered: false,
metadata: None,
},
],
metadata: {},
},
input: AggregateExec {
mode: FinalPartitioned,
group_by: PhysicalGroupBy {
expr: [
(
Column {
name: "t",
index: 0,
},
"t",
),
],
null_expr: [],
groups: [
[
false,
],
],
},
aggr_expr: [
DistinctCount {
name: "COUNT(DISTINCT demo.name)",
data_type: Int64,
state_data_types: [
Utf8,
],
exprs: [
Column {
name: "name",
index: 1,
},
],
},
],
input: CoalesceBatchesExec {
input: RepartitionExec {
input: AggregateExec {
mode: Partial,
group_by: PhysicalGroupBy {
expr: [
(
Column {
name: "t",
index: 0,
},
"t",
),
],
null_expr: [],
groups: [
[
false,
],
],
},
aggr_expr: [
DistinctCount {
name: "COUNT(DISTINCT demo.name)",
data_type: Int64,
state_data_types: [
Utf8,
],
exprs: [
Column {
name: "name",
index: 1,
},
],
},
],
input: ScanTable {
projected_schema: ProjectedSchema {
original_schema: Schema {
num_key_columns: 2,
timestamp_index: 0,
tsid_index: Some(
1,
),
enable_tsid_primary_key: true,
column_schemas: ColumnSchemas {
columns: [
ColumnSchema {
id: 1,
name: "t",
data_type: Timestamp,
is_nullable: false,
is_tag: false,
comment: "",
escaped_name: "t",
default_value: None,
},
ColumnSchema {
id: 2,
name: "tsid",
data_type: UInt64,
is_nullable: false,
is_tag: false,
comment: "",
escaped_name: "tsid",
default_value: None,
},
ColumnSchema {
id: 3,
name: "name",
data_type: String,
is_nullable: true,
is_tag: true,
comment: "",
escaped_name: "name",
default_value: None,
},
ColumnSchema {
id: 4,
name: "value",
data_type: Double,
is_nullable: false,
is_tag: false,
comment: "",
escaped_name: "value",
default_value: None,
},
],
},
version: 1,
},
projection: Some(
[
0,
2,
],
),
},
table: "demo",
read_order: None,
read_parallelism: 8,
predicate: Predicate {
exprs: [],
time_range: TimeRange {
inclusive_start: Timestamp(
-9223372036854775808,
),
exclusive_end: Timestamp(
9223372036854775807,
),
},
},
},
schema: Schema {
fields: [
Field {
name: "t",
data_type: Timestamp(
Millisecond,
None,
),
nullable: false,
dict_id: 0,
dict_is_ordered: false,
metadata: None,
},
Field {
name: "COUNT(DISTINCT demo.name)[count distinct]",
data_type: List(
Field {
name: "item",
data_type: Utf8,
nullable: true,
dict_id: 0,
dict_is_ordered: false,
metadata: None,
},
),
nullable: false,
dict_id: 0,
dict_is_ordered: false,
metadata: None,
},
],
metadata: {},
},
input_schema: Schema {
fields: [
Field {
name: "t",
data_type: Timestamp(
Millisecond,
None,
),
nullable: false,
dict_id: 0,
dict_is_ordered: false,
metadata: Some(
{
"field::comment": "",
"field::id": "1",
"field::is_tag": "false",
},
),
},
Field {
name: "name",
data_type: Utf8,
nullable: true,
dict_id: 0,
dict_is_ordered: false,
metadata: Some(
{
"field::comment": "",
"field::id": "3",
"field::is_tag": "true",
},
),
},
],
metadata: {
"schema:num_key_columns": "2",
"schema::enable_tsid_primary_key": "true",
"schema::timestamp_index": "0",
"schema::version": "1",
},
},
metrics: ExecutionPlanMetricsSet {
inner: Mutex {
data: MetricsSet {
metrics: [],
},
},
},
},
partitioning: Hash(
[
Column {
name: "t",
index: 0,
},
],
8,
),
state: Mutex {
data: RepartitionExecState {
channels: {},
abort_helper: AbortOnDropMany(
[],
),
},
},
metrics: ExecutionPlanMetricsSet {
inner: Mutex {
data: MetricsSet {
metrics: [],
},
},
},
},
target_batch_size: 4096,
metrics: ExecutionPlanMetricsSet {
inner: Mutex {
data: MetricsSet {
metrics: [],
},
},
},
},
schema: Schema {
fields: [
Field {
name: "t",
data_type: Timestamp(
Millisecond,
None,
),
nullable: false,
dict_id: 0,
dict_is_ordered: false,
metadata: None,
},
Field {
name: "COUNT(DISTINCT demo.name)",
data_type: Int64,
nullable: true,
dict_id: 0,
dict_is_ordered: false,
metadata: None,
},
],
metadata: {},
},
input_schema: Schema {
fields: [
Field {
name: "t",
data_type: Timestamp(
Millisecond,
None,
),
nullable: false,
dict_id: 0,
dict_is_ordered: false,
metadata: Some(
{
"field::comment": "",
"field::id": "1",
"field::is_tag": "false",
},
),
},
Field {
name: "name",
data_type: Utf8,
nullable: true,
dict_id: 0,
dict_is_ordered: false,
metadata: Some(
{
"field::comment": "",
"field::id": "3",
"field::is_tag": "true",
},
),
},
],
metadata: {
"schema:num_key_columns": "2",
"schema::enable_tsid_primary_key": "true",
"schema::timestamp_index": "0",
"schema::version": "1",
},
},
metrics: ExecutionPlanMetricsSet {
inner: Mutex {
data: MetricsSet {
metrics: [],
},
},
},
},
metrics: ExecutionPlanMetricsSet {
inner: Mutex {
data: MetricsSet {
metrics: [],
},
},
},
}
Describe the bug
Datafusion panic when I query
select app,count(distinct name) fromdemogroup by app.Here is the stacktrace:
To Reproduce
Expected behavior
Return a result, not panic
Additional context
I found this bug when I use ceresdb, apache/horaedb#302;
And I found if partition_num is set to more than 1, the error is as above; If partition_num is set to 1, error is as:#1623.
With digging into code, I found Logical Plan is :
and physical plan is as following, I guess the second AggregateExec's schema field "COUNT(DISTINCT demo.name)[count distinct]" which is nullable cause the error.