diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 8a59cae019671..fab054eef0a04 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -320,6 +320,20 @@ jobs: - name: Run datafusion-common tests run: cargo test -p datafusion-common --features=pyarrow + vendor: + name: Verify Vendored Code + runs-on: ubuntu-latest + container: + image: amd64/rust + steps: + - uses: actions/checkout@v3 + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + - name: Run gen + run: ./regen.sh + working-directory: ./datafusion/proto + - name: Verify workspace clean (if this fails, run ./datafusion/proto/regen.sh and check in results) + run: git diff --exit-code check-fmt: name: Check cargo fmt diff --git a/Cargo.toml b/Cargo.toml index e53865a368817..728a82468985a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ members = [ "datafusion/optimizer", "datafusion/physical-expr", "datafusion/proto", + "datafusion/proto/gen", "datafusion/row", "datafusion/sql", "datafusion/substrait", diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index 4129980bf7033..f174aa90d5943 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -54,8 +54,3 @@ serde_json = { version = "1.0", optional = true } [dev-dependencies] doc-comment = "0.3" tokio = "1.18" - -[build-dependencies] -# Pin these dependencies so that the generated output is deterministic -pbjson-build = { version = "=0.5.1" } -prost-build = { version = "=0.11.7" } diff --git a/datafusion/proto/README.md b/datafusion/proto/README.md index 4b9afd3c3405b..015ded8b0d706 100644 --- a/datafusion/proto/README.md +++ b/datafusion/proto/README.md @@ -94,4 +94,15 @@ async fn main() -> Result<()> { ``` +## Generated Code + +The prost/tonic code can be generated by running, which in turn invokes the Rust binary located in [gen](./gen) + +This is necessary after modifying the protobuf definitions or altering the dependencies of [gen](./gen), and requires a +valid installation of [protoc](https://github.com/protocolbuffers/protobuf#protocol-compiler-installation). + +```bash +./regen.sh +``` + [df]: https://crates.io/crates/datafusion diff --git a/datafusion/proto/gen/Cargo.toml b/datafusion/proto/gen/Cargo.toml new file mode 100644 index 0000000000000..a792eb790423d --- /dev/null +++ b/datafusion/proto/gen/Cargo.toml @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "gen" +description = "Code generation for proto" +version = "0.1.0" +edition = "2021" +rust-version = "1.62" +authors = ["Apache Arrow "] +homepage = "https://github.com/apache/arrow-datafusion" +repository = "https://github.com/apache/arrow-datafusion" +license = "Apache-2.0" +publish = false + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +# Pin these dependencies so that the generated output is deterministic +pbjson-build = { version = "=0.5.1" } +prost-build = { version = "=0.11.7" } diff --git a/datafusion/proto/build.rs b/datafusion/proto/gen/src/main.rs similarity index 68% rename from datafusion/proto/build.rs rename to datafusion/proto/gen/src/main.rs index 7d3582d2b2243..a8936a2cb9cc8 100644 --- a/datafusion/proto/build.rs +++ b/datafusion/proto/gen/src/main.rs @@ -15,42 +15,31 @@ // specific language governing permissions and limitations // under the License. -use std::path::{Path, PathBuf}; +use std::path::Path; type Error = Box; type Result = std::result::Result; fn main() -> Result<(), String> { - // for use in docker build where file changes can be wonky - println!("cargo:rerun-if-env-changed=FORCE_REBUILD"); + let proto_dir = Path::new("proto"); + let proto_path = Path::new("proto/datafusion.proto"); - // We don't include the proto files in releases so that downstreams - // do not need to have PROTOC included - if Path::new("proto/datafusion.proto").exists() { - println!("cargo:rerun-if-changed=proto/datafusion.proto"); - build()? - } - - Ok(()) -} - -fn build() -> Result<(), String> { - let out: PathBuf = std::env::var("OUT_DIR") - .expect("Cannot find OUT_DIR environment variable") - .into(); - let descriptor_path = out.join("proto_descriptor.bin"); + // proto definitions has to be there + let descriptor_path = proto_dir.join("proto_descriptor.bin"); prost_build::Config::new() .file_descriptor_set_path(&descriptor_path) + .out_dir("src") .compile_well_known_types() .extern_path(".google.protobuf", "::pbjson_types") - .compile_protos(&["proto/datafusion.proto"], &["proto"]) + .compile_protos(&[proto_path], &["proto"]) .map_err(|e| format!("protobuf compilation failed: {e}"))?; let descriptor_set = std::fs::read(&descriptor_path) .unwrap_or_else(|e| panic!("Cannot read {:?}: {}", &descriptor_path, e)); pbjson_build::Builder::new() + .out_dir("src") .register_descriptors(&descriptor_set) .unwrap_or_else(|e| { panic!("Cannot register descriptors {:?}: {}", &descriptor_set, e) @@ -58,8 +47,8 @@ fn build() -> Result<(), String> { .build(&[".datafusion"]) .map_err(|e| format!("pbjson compilation failed: {e}"))?; - let prost = out.join("datafusion.rs"); - let pbjson = out.join("datafusion.serde.rs"); + let prost = Path::new("src/datafusion.rs"); + let pbjson = Path::new("src/datafusion.serde.rs"); std::fs::copy(prost, "src/generated/prost.rs").unwrap(); std::fs::copy(pbjson, "src/generated/pbjson.rs").unwrap(); diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 76ec5b0017084..f3d23ba0eef0e 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -71,7 +71,7 @@ message LogicalPlanNode { DistinctNode distinct = 23; ViewTableScanNode view_scan = 24; CustomTableScanNode custom_scan = 25; - PrepareNode prepare = 26; + PrepareNode prepare = 260; } } diff --git a/datafusion/proto/regen.sh b/datafusion/proto/regen.sh new file mode 100755 index 0000000000000..4b7ad4d58533e --- /dev/null +++ b/datafusion/proto/regen.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +cd $SCRIPT_DIR && cargo run --manifest-path gen/Cargo.toml \ No newline at end of file diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index e5b4534f60a02..730b966a5e742 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -38,7 +38,7 @@ pub struct DfSchema { pub struct LogicalPlanNode { #[prost( oneof = "logical_plan_node::LogicalPlanType", - tags = "1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26" + tags = "1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 260" )] pub logical_plan_type: ::core::option::Option, } @@ -95,7 +95,7 @@ pub mod logical_plan_node { ViewScan(::prost::alloc::boxed::Box), #[prost(message, tag = "25")] CustomScan(super::CustomTableScanNode), - #[prost(message, tag = "26")] + #[prost(message, tag = "260")] Prepare(::prost::alloc::boxed::Box), } }