diff --git a/docs/src/examples/index.md b/docs/src/examples/index.md index 0aceb823..bb041871 100644 --- a/docs/src/examples/index.md +++ b/docs/src/examples/index.md @@ -963,7 +963,7 @@ Now we have all the components to construct the final query: ORDER BY "condition_occurrence_1"."condition_occurrence_id" ) AS "condition_occurrence_2" WHERE (EXISTS ( - SELECT NULL AS "_" + SELECT "visit_occurrence_1"."visit_occurrence_id" FROM "visit_occurrence" AS "visit_occurrence_1" JOIN "base_4" AS "base_6" ON ("visit_occurrence_1"."visit_concept_id" = "base_6"."concept_id") WHERE diff --git a/docs/src/guide/index.md b/docs/src/guide/index.md index 6a918797..63d59b8f 100644 --- a/docs/src/guide/index.md +++ b/docs/src/guide/index.md @@ -1113,7 +1113,7 @@ expression evaluated in the context of the outer query. "visit_occurrence_1"."visit_source_concept_id" FROM "visit_occurrence" AS "visit_occurrence_1" WHERE (EXISTS ( - SELECT NULL AS "_" + SELECT "condition_occurrence_1"."condition_occurrence_id" FROM "condition_occurrence" AS "condition_occurrence_1" WHERE ("condition_occurrence_1"."person_id" = "visit_occurrence_1"."person_id") AND diff --git a/docs/src/test/nodes.md b/docs/src/test/nodes.md index 91c2b410..32a44cf2 100644 --- a/docs/src/test/nodes.md +++ b/docs/src/test/nodes.md @@ -20,7 +20,7 @@ We start with specifying the database model. SQLTable(:person, columns = [:person_id, :gender_concept_id, :year_of_birth, :month_of_birth, :day_of_birth, :birth_datetime, :location_id]) const visit_occurrence = - SQLTable(:visit_occurrence, columns = [:visit_occurrence_id, :person_id, :visit_start_date, :visit_end_date]) + SQLTable(:visit_occurrence, columns = [:visit_occurrence_id, :person_id, :visit_concept_id, :visit_start_date, :visit_end_date]) const measurement = SQLTable(:measurement, columns = [:measurement_id, :person_id, :measurement_concept_id, :measurement_date]) @@ -810,163 +810,6 @@ column. =# -## Variables - -A query variable is created with the `Var` constructor. - - e = Var(:YEAR) - #-> Var.YEAR - -Alternatively, use shorthand notation. - - Var.YEAR - #-> Var.YEAR - - Var."YEAR" - #-> Var.YEAR - - Var[:YEAR] - #-> Var.YEAR - - Var["YEAR"] - #-> Var.YEAR - -A variable could be created with `@funsql` notation. - - @funsql :YEAR - #-> Var.YEAR - -Unbound query variables are serialized as query parameters. - - q = From(person) |> - Where(Get.year_of_birth .> Var.YEAR) - - sql = render(q) - - print(sql) - #=> - SELECT - "person_1"."person_id", - ⋮ - "person_1"."location_id" - FROM "person" AS "person_1" - WHERE ("person_1"."year_of_birth" > :YEAR) - =# - - sql.vars - #-> [:YEAR] - -Query variables could be bound using the `Bind` constructor. - - q0(person_id) = - From(visit_occurrence) |> - Where(Get.person_id .== Var.PERSON_ID) |> - Bind(:PERSON_ID => person_id) - - q0(1) - #-> (…) |> Bind(…) - - display(q0(1)) - #=> - let visit_occurrence = SQLTable(:visit_occurrence, …), - q1 = From(visit_occurrence), - q2 = q1 |> Where(Fun."="(Get.person_id, Var.PERSON_ID)) - q2 |> Bind(1 |> As(:PERSON_ID)) - end - =# - - print(render(q0(1))) - #=> - SELECT - "visit_occurrence_1"."visit_occurrence_id", - "visit_occurrence_1"."person_id", - "visit_occurrence_1"."visit_start_date", - "visit_occurrence_1"."visit_end_date" - FROM "visit_occurrence" AS "visit_occurrence_1" - WHERE ("visit_occurrence_1"."person_id" = 1) - =# - -A `Bind` node can be created with `@funsql` notation. - - q = @funsql begin - from(visit_occurrence) - filter(person_id == :PERSON_ID) - bind(:PERSON_ID => person_id) - end - - display(q) - #=> - let q1 = From(:visit_occurrence), - q2 = q1 |> Where(Fun."="(Get.person_id, Var.PERSON_ID)) - q2 |> Bind(Get.person_id |> As(:PERSON_ID)) - end - =# - -`Bind` lets us create correlated subqueries. - - q = From(person) |> - Where(Fun.exists(q0(Get.person_id))) - - print(render(q)) - #=> - SELECT - "person_1"."person_id", - ⋮ - "person_1"."location_id" - FROM "person" AS "person_1" - WHERE (EXISTS ( - SELECT NULL AS "_" - FROM "visit_occurrence" AS "visit_occurrence_1" - WHERE ("visit_occurrence_1"."person_id" = "person_1"."person_id") - )) - =# - -When an argument to `Bind` is an aggregate, it must be evaluated in a nested -subquery. - - q0(person_id, date) = - From(observation) |> - Where(Fun.and(Get.person_id .== Var.PERSON_ID, - Get.observation_date .>= Var.DATE)) |> - Bind(:PERSON_ID => person_id, :DATE => date) - - q = From(visit_occurrence) |> - Group(Get.person_id) |> - Where(Fun.exists(q0(Get.person_id, Agg.max(Get.visit_start_date)))) - - print(render(q)) - #=> - SELECT "visit_occurrence_2"."person_id" - FROM ( - SELECT - "visit_occurrence_1"."person_id", - max("visit_occurrence_1"."visit_start_date") AS "max" - FROM "visit_occurrence" AS "visit_occurrence_1" - GROUP BY "visit_occurrence_1"."person_id" - ) AS "visit_occurrence_2" - WHERE (EXISTS ( - SELECT NULL AS "_" - FROM "observation" AS "observation_1" - WHERE - ("observation_1"."person_id" = "visit_occurrence_2"."person_id") AND - ("observation_1"."observation_date" >= "visit_occurrence_2"."max") - )) - =# - -An empty `Bind` can be created. - - Bind(args = []) - #-> Bind(args = []) - -`Bind` requires that all variables have a unique name. - - Bind(:PERSON_ID => 1, :PERSON_ID => 2) - #=> - ERROR: FunSQL.DuplicateLabelError: `PERSON_ID` is used more than once in: - Bind(1 |> As(:PERSON_ID), 2 |> As(:PERSON_ID)) - =# - - ## Functions and Operators A function or an operator invocation is created with the `Fun` constructor. @@ -1193,7 +1036,7 @@ A function invocation may include a nested query. print(render(q)) #=> SELECT (EXISTS ( - SELECT NULL AS "_" + SELECT "person_1"."person_id" FROM "person" AS "person_1" WHERE ("person_1"."year_of_birth" > 1950) )) AS "exists" @@ -1347,6 +1190,281 @@ FunSQL can simplify logical expressions. =# +## Scalar Subqueries + +In SQL, a scalar expression may contain a subquery. This subquery should select +one column and should produce zero or one row (unless used as an argument of +`IN` or `EXISTS`). Such subqueries can be constructed with FunSQL. + + p = From(concept) |> + Where(Fun.and(Get.vocabulary_id .== "Visit", Get.concept_code .== "IP")) |> + Select(Get.concept_name) + + print(render(p)) + #=> + SELECT "concept_1"."concept_name" + FROM "concept" AS "concept_1" + WHERE + ("concept_1"."vocabulary_id" = 'Visit') AND + ("concept_1"."concept_code" = 'IP') + =# + + q = Select(p) + + print(render(q)) + #=> + SELECT ( + SELECT "concept_1"."concept_name" + FROM "concept" AS "concept_1" + WHERE + ("concept_1"."vocabulary_id" = 'Visit') AND + ("concept_1"."concept_code" = 'IP') + ) AS "concept" + =# + +When a subquery is used in a scalar context, FunSQL automatically selects +its first column, which often makes an explicit `Select` unnecessary. + + p = From(concept) |> + Where(Fun.and(Get.vocabulary_id .== "Visit", Get.concept_code .== "IP")) + + print(render(p)) + #=> + SELECT + "concept_1"."concept_id", + "concept_1"."vocabulary_id", + "concept_1"."concept_code", + "concept_1"."concept_name" + FROM "concept" AS "concept_1" + WHERE + ("concept_1"."vocabulary_id" = 'Visit') AND + ("concept_1"."concept_code" = 'IP') + =# + + q = From(visit_occurrence) |> + Where(Get.visit_concept_id .== p) + + print(render(q)) + #=> + SELECT + "visit_occurrence_1"."visit_occurrence_id", + "visit_occurrence_1"."person_id", + "visit_occurrence_1"."visit_concept_id", + "visit_occurrence_1"."visit_start_date", + "visit_occurrence_1"."visit_end_date" + FROM "visit_occurrence" AS "visit_occurrence_1" + WHERE ("visit_occurrence_1"."visit_concept_id" = ( + SELECT "concept_1"."concept_id" + FROM "concept" AS "concept_1" + WHERE + ("concept_1"."vocabulary_id" = 'Visit') AND + ("concept_1"."concept_code" = 'IP') + )) + =# + +If the subquery explicitly selects more than one column, an extra `SELECT` +clause is added. + + p = From(concept) |> + Where(Fun.and(Get.vocabulary_id .== "Visit", Get.concept_code .== "IP")) |> + Select(Get.concept_id, Get.concept_name) + + q = Select(p) + + print(render(q)) + #=> + SELECT ( + SELECT "concept_2"."concept_id" + FROM ( + SELECT + "concept_1"."concept_id", + "concept_1"."concept_name" + FROM "concept" AS "concept_1" + WHERE + ("concept_1"."vocabulary_id" = 'Visit') AND + ("concept_1"."concept_code" = 'IP') + ) AS "concept_2" + ) AS "concept" + =# + +If no columns are selected, a column containing `NULL` is added. + + p = From(concept) |> + Where(Fun.and(Get.vocabulary_id .== "Visit", Get.concept_code .== "IP")) |> + Select(args = []) + + q = Select(p) + + print(render(q)) + #=> + SELECT ( + SELECT NULL AS "_" + FROM "concept" AS "concept_1" + WHERE + ("concept_1"."vocabulary_id" = 'Visit') AND + ("concept_1"."concept_code" = 'IP') + ) AS "concept" + =# + + +## Variables + +A query variable is created with the `Var` constructor. + + e = Var(:YEAR) + #-> Var.YEAR + +Alternatively, use shorthand notation. + + Var.YEAR + #-> Var.YEAR + + Var."YEAR" + #-> Var.YEAR + + Var[:YEAR] + #-> Var.YEAR + + Var["YEAR"] + #-> Var.YEAR + +A variable could be created with `@funsql` notation. + + @funsql :YEAR + #-> Var.YEAR + +Unbound query variables are serialized as query parameters. + + q = From(person) |> + Where(Get.year_of_birth .> Var.YEAR) + + sql = render(q) + + print(sql) + #=> + SELECT + "person_1"."person_id", + ⋮ + "person_1"."location_id" + FROM "person" AS "person_1" + WHERE ("person_1"."year_of_birth" > :YEAR) + =# + + sql.vars + #-> [:YEAR] + +Query variables could be bound using the `Bind` constructor. + + q0(person_id) = + From(visit_occurrence) |> + Where(Get.person_id .== Var.PERSON_ID) |> + Bind(:PERSON_ID => person_id) + + q0(1) + #-> (…) |> Bind(…) + + display(q0(1)) + #=> + let visit_occurrence = SQLTable(:visit_occurrence, …), + q1 = From(visit_occurrence), + q2 = q1 |> Where(Fun."="(Get.person_id, Var.PERSON_ID)) + q2 |> Bind(1 |> As(:PERSON_ID)) + end + =# + + print(render(q0(1))) + #=> + SELECT + "visit_occurrence_1"."visit_occurrence_id", + "visit_occurrence_1"."person_id", + "visit_occurrence_1"."visit_concept_id", + "visit_occurrence_1"."visit_start_date", + "visit_occurrence_1"."visit_end_date" + FROM "visit_occurrence" AS "visit_occurrence_1" + WHERE ("visit_occurrence_1"."person_id" = 1) + =# + +A `Bind` node can be created with `@funsql` notation. + + q = @funsql begin + from(visit_occurrence) + filter(person_id == :PERSON_ID) + bind(:PERSON_ID => person_id) + end + + display(q) + #=> + let q1 = From(:visit_occurrence), + q2 = q1 |> Where(Fun."="(Get.person_id, Var.PERSON_ID)) + q2 |> Bind(Get.person_id |> As(:PERSON_ID)) + end + =# + +`Bind` lets us create correlated subqueries. + + q = From(person) |> + Where(Fun.exists(q0(Get.person_id))) + + print(render(q)) + #=> + SELECT + "person_1"."person_id", + ⋮ + "person_1"."location_id" + FROM "person" AS "person_1" + WHERE (EXISTS ( + SELECT "visit_occurrence_1"."visit_occurrence_id" + FROM "visit_occurrence" AS "visit_occurrence_1" + WHERE ("visit_occurrence_1"."person_id" = "person_1"."person_id") + )) + =# + +When an argument to `Bind` is an aggregate, it must be evaluated in a nested +subquery. + + q0(person_id, date) = + From(observation) |> + Where(Fun.and(Get.person_id .== Var.PERSON_ID, + Get.observation_date .>= Var.DATE)) |> + Bind(:PERSON_ID => person_id, :DATE => date) + + q = From(visit_occurrence) |> + Group(Get.person_id) |> + Where(Fun.exists(q0(Get.person_id, Agg.max(Get.visit_start_date)))) + + print(render(q)) + #=> + SELECT "visit_occurrence_2"."person_id" + FROM ( + SELECT + "visit_occurrence_1"."person_id", + max("visit_occurrence_1"."visit_start_date") AS "max" + FROM "visit_occurrence" AS "visit_occurrence_1" + GROUP BY "visit_occurrence_1"."person_id" + ) AS "visit_occurrence_2" + WHERE (EXISTS ( + SELECT "observation_1"."observation_id" + FROM "observation" AS "observation_1" + WHERE + ("observation_1"."person_id" = "visit_occurrence_2"."person_id") AND + ("observation_1"."observation_date" >= "visit_occurrence_2"."max") + )) + =# + +An empty `Bind` can be created. + + Bind(args = []) + #-> Bind(args = []) + +`Bind` requires that all variables have a unique name. + + Bind(:PERSON_ID => 1, :PERSON_ID => 2) + #=> + ERROR: FunSQL.DuplicateLabelError: `PERSON_ID` is used more than once in: + Bind(1 |> As(:PERSON_ID), 2 |> As(:PERSON_ID)) + =# + + ## `Append` The `Append` constructor creates a subquery that concatenates the output of @@ -3410,12 +3528,14 @@ values of unmatched rows. SELECT "visit_occurrence_2"."visit_occurrence_id", "visit_occurrence_2"."person_id", + "visit_occurrence_2"."visit_concept_id", "visit_occurrence_2"."visit_start_date", "visit_occurrence_2"."visit_end_date" FROM ( SELECT "visit_occurrence_1"."visit_occurrence_id", "visit_occurrence_1"."person_id", + "visit_occurrence_1"."visit_concept_id", "visit_occurrence_1"."visit_start_date", "visit_occurrence_1"."visit_end_date", (row_number() OVER (ORDER BY "visit_occurrence_1"."visit_start_date")) AS "row_number" diff --git a/src/link.jl b/src/link.jl index 882bbac9..d4d12487 100644 --- a/src/link.jl +++ b/src/link.jl @@ -52,13 +52,6 @@ function dismantle_scalar(ns::Vector{SQLNode}, ctx) SQLNode[dismantle_scalar(n, ctx) for n in ns] end -function dismantle_scalar(n::TabularNode, ctx) - n′ = dismantle(convert(SQLNode, n), ctx) - push!(ctx.defs, n′) - ref = lastindex(ctx.defs) - Isolated(ref) -end - function dismantle_scalar(n::AggregateNode, ctx) args′ = dismantle_scalar(n.args, ctx) filter′ = n.filter !== nothing ? dismantle_scalar(n.filter, ctx) : nothing @@ -167,8 +160,17 @@ end dismantle(n::ResolvedNode, ctx) = dismantle(n.over, ctx) -dismantle_scalar(n::ResolvedNode, ctx) = - dismantle_scalar(n.over, ctx) +function dismantle_scalar(n::ResolvedNode, ctx) + t = n.type + if t isa RowType + n′ = dismantle(n.over, ctx) + push!(ctx.defs, n′) + ref = lastindex(ctx.defs) + Isolated(ref, t) + else + dismantle_scalar(n.over, ctx) + end +end function dismantle(n::SelectNode, ctx) over′ = dismantle(n.over, ctx) @@ -539,6 +541,12 @@ function gather!(n::IsolatedNode, ctx) def = ctx.defs[n.idx] !@dissect(def, Linked()) || return refs = SQLNode[] + for (f, ft) in n.type.fields + if ft isa ScalarType + push!(refs, Get(f)) + break + end + end def′ = Linked(refs, over = link(def, ctx, refs)) ctx.defs[n.idx] = def′ nothing diff --git a/src/nodes/bind.jl b/src/nodes/bind.jl index a9f49721..ccc7551f 100644 --- a/src/nodes/bind.jl +++ b/src/nodes/bind.jl @@ -48,7 +48,7 @@ julia> print(render(q, tables = [person, visit_occurrence])) SELECT "person_1"."person_id" FROM "person" AS "person_1" WHERE (EXISTS ( - SELECT NULL AS "_" + SELECT "visit_occurrence_1"."visit_occurrence_id" FROM "visit_occurrence" AS "visit_occurrence_1" WHERE ("visit_occurrence_1"."person_id" = "person_1"."person_id") )) diff --git a/src/nodes/internal.jl b/src/nodes/internal.jl index df0340d4..8a5f1025 100644 --- a/src/nodes/internal.jl +++ b/src/nodes/internal.jl @@ -279,19 +279,20 @@ end # Isolated subquery. mutable struct IsolatedNode <: AbstractSQLNode idx::Int + type::RowType - IsolatedNode(; idx) = - new(idx) + IsolatedNode(; idx, type) = + new(idx, type) end -IsolatedNode(idx) = - IsolatedNode(idx = idx) +IsolatedNode(idx, type) = + IsolatedNode(idx = idx, type = type) Isolated(args...; kws...) = IsolatedNode(args...; kws...) |> SQLNode PrettyPrinting.quoteof(n::IsolatedNode, ctx::QuoteContext) = - Expr(:call, nameof(Isolated), n.idx) + Expr(:call, nameof(Isolated), n.idx, quoteof(n.type)) dissect(scr::Symbol, ::typeof(Isolated), pats::Vector{Any}) = dissect(scr, IsolatedNode, pats) diff --git a/src/translate.jl b/src/translate.jl index c53c8b74..cc2f2128 100644 --- a/src/translate.jl +++ b/src/translate.jl @@ -38,6 +38,30 @@ function complete(a::Assemblage) clause end +# Add a SELECT clause aligned with the exported references. +function complete_aligned(a::Assemblage, ctx) + aligned = + length(a.cols) == length(ctx.refs) && + all(a.repl[ref] === name for (name, ref) in zip(keys(a.cols), ctx.refs)) + !aligned || return complete(a) + if !@dissect(a.clause, SELECT() || UNION()) + alias = nothing + clause = a.clause + else + alias = allocate_alias(ctx, a) + clause = FROM(AS(over = a.clause, name = alias)) + end + subs = make_subs(a, alias) + repl = Dict{SQLNode, Symbol}() + cols = OrderedDict{Symbol, SQLClause}() + for ref in ctx.refs + name = repl[ref] = a.repl[ref] + cols[name] = subs[ref] + end + a′ = Assemblage(a.name, clause, repl = repl, cols = cols) + complete(a′) +end + # Build node->clause map assuming that the assemblage will be extended. function make_subs(a::Assemblage, ::Nothing)::Dict{SQLNode, SQLClause} subs = Dict{SQLNode, SQLClause}() @@ -186,12 +210,13 @@ end function translate(n::SQLNode) @dissect(n, WithContext(over = Linked(over = n′, refs = refs), catalog = catalog, defs = defs)) || throw(IllFormedError()) ctx = TranslateContext(catalog = catalog, defs = defs) - base = assemble(n′, TranslateContext(ctx, refs = refs)) + ctx′ = TranslateContext(ctx, refs = refs) + base = assemble(n′, ctx′) columns = nothing - if !isempty(base.cols) - columns = [SQLColumn(col) for col in keys(base.cols)] + if !isempty(refs) + columns = [SQLColumn(base.repl[ref]) for ref in refs] end - c = complete(base) + c = complete_aligned(base, ctx′) with_args = SQLClause[] for cte_a in ctx.ctes !cte_a.external || continue @@ -300,13 +325,12 @@ function translate(n::FunctionNode, ctx) end function translate(n::IsolatedNode, ctx) - base = assemble(ctx.defs[n.idx], ctx) - complete(base) + translate(ctx.defs[n.idx], ctx) end function translate(n::LinkedNode, ctx) - base = assemble(n.over, TranslateContext(ctx, refs = n.refs)) - complete(base) + base = assemble(n, ctx) + complete_aligned(base, TranslateContext(ctx, refs = n.refs)) end function translate(n::LiteralNode, ctx)