diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index ee96d6d83f90e..c00c703167723 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -6980,6 +6980,18 @@ ], "sqlState" : "428EK" }, + "THETA_FAMILY_MUST_BE_CONSTANT" : { + "message" : [ + "Invalid call to ; the `family` value must be a constant value, but got a non-constant expression." + ], + "sqlState" : "42K0E" + }, + "THETA_INVALID_FAMILY" : { + "message" : [ + "Invalid call to ; the `family` parameter must be one of: , but got: ." + ], + "sqlState" : "22546" + }, "THETA_INVALID_INPUT_SKETCH_BUFFER" : { "message" : [ "Invalid call to ; only valid Theta sketch buffers are supported as inputs (such as those produced by the `theta_sketch_agg` function)." diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index be2ce20e36206..21fb019c2a8fc 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -4594,12 +4594,12 @@ def hll_union( def theta_sketch_agg( col: "ColumnOrName", lgNomEntries: Optional[Union[int, Column]] = None, + family: Optional[str] = None, ) -> Column: fn = "theta_sketch_agg" - if lgNomEntries is None: - return _invoke_function_over_columns(fn, col) - else: - return _invoke_function_over_columns(fn, col, lit(lgNomEntries)) + _lgNomEntries = lit(12) if lgNomEntries is None else lit(lgNomEntries) + _family = lit("QUICKSELECT") if family is None else lit(family) + return _invoke_function_over_columns(fn, col, _lgNomEntries, _family) theta_sketch_agg.__doc__ = pysparkfuncs.theta_sketch_agg.__doc__ diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index f8d18b8e9b5ce..fb5a5fafb46d8 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -26786,10 +26786,12 @@ def hll_union( def theta_sketch_agg( col: "ColumnOrName", lgNomEntries: Optional[Union[int, Column]] = None, + family: Optional[str] = None, ) -> Column: """ Aggregate function: returns the compact binary representation of the Datasketches - ThetaSketch with the values in the input column configured with lgNomEntries nominal entries. + ThetaSketch with the values in the input column configured with lgNomEntries nominal entries + and the specified sketch family. .. versionadded:: 4.1.0 @@ -26799,6 +26801,8 @@ def theta_sketch_agg( lgNomEntries : :class:`~pyspark.sql.Column` or int, optional The log-base-2 of nominal entries, where nominal entries is the size of the sketch (must be between 4 and 26, defaults to 12) + family : str, optional + The sketch family: 'QUICKSELECT' or 'ALPHA' (defaults to 'QUICKSELECT'). Returns ------- @@ -26819,24 +26823,30 @@ def theta_sketch_agg( >>> from pyspark.sql import functions as sf >>> df = spark.createDataFrame([1,2,2,3], "INT") >>> df.agg(sf.theta_sketch_estimate(sf.theta_sketch_agg("value"))).show() - +--------------------------------------------------+ - |theta_sketch_estimate(theta_sketch_agg(value, 12))| - +--------------------------------------------------+ - | 3| - +--------------------------------------------------+ + +---------------------------------------------------------------+ + |theta_sketch_estimate(theta_sketch_agg(value, 12, QUICKSELECT))| + +---------------------------------------------------------------+ + | 3| + +---------------------------------------------------------------+ >>> df.agg(sf.theta_sketch_estimate(sf.theta_sketch_agg("value", 15))).show() - +--------------------------------------------------+ - |theta_sketch_estimate(theta_sketch_agg(value, 15))| - +--------------------------------------------------+ - | 3| - +--------------------------------------------------+ + +---------------------------------------------------------------+ + |theta_sketch_estimate(theta_sketch_agg(value, 15, QUICKSELECT))| + +---------------------------------------------------------------+ + | 3| + +---------------------------------------------------------------+ + + >>> df.agg(sf.theta_sketch_estimate(sf.theta_sketch_agg("value", 15, "ALPHA"))).show() + +---------------------------------------------------------+ + |theta_sketch_estimate(theta_sketch_agg(value, 15, ALPHA))| + +---------------------------------------------------------+ + | 3| + +---------------------------------------------------------+ """ fn = "theta_sketch_agg" - if lgNomEntries is None: - return _invoke_function_over_columns(fn, col) - else: - return _invoke_function_over_columns(fn, col, lit(lgNomEntries)) + _lgNomEntries = lit(12) if lgNomEntries is None else lit(lgNomEntries) + _family = lit("QUICKSELECT") if family is None else lit(family) + return _invoke_function_over_columns(fn, col, _lgNomEntries, _family) @_try_remote_functions @@ -28027,11 +28037,11 @@ def theta_sketch_estimate(col: "ColumnOrName") -> Column: >>> from pyspark.sql import functions as sf >>> df = spark.createDataFrame([1,2,2,3], "INT") >>> df.agg(sf.theta_sketch_estimate(sf.theta_sketch_agg("value"))).show() - +--------------------------------------------------+ - |theta_sketch_estimate(theta_sketch_agg(value, 12))| - +--------------------------------------------------+ - | 3| - +--------------------------------------------------+ + +---------------------------------------------------------------+ + |theta_sketch_estimate(theta_sketch_agg(value, 12, QUICKSELECT))| + +---------------------------------------------------------------+ + | 3| + +---------------------------------------------------------------+ """ fn = "theta_sketch_estimate" diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala index fce3662c36674..e7499533e2eef 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala @@ -1275,6 +1275,17 @@ object functions { def theta_sketch_agg(e: Column, lgNomEntries: Column): Column = Column.fn("theta_sketch_agg", e, lgNomEntries) + /** + * Aggregate function: returns the compact binary representation of the Datasketches ThetaSketch + * built with the values in the input column and configured with the `lgNomEntries` nominal + * entries and `family`. + * + * @group agg_funcs + * @since 4.1.0 + */ + def theta_sketch_agg(e: Column, lgNomEntries: Column, family: Column): Column = + Column.fn("theta_sketch_agg", e, lgNomEntries, family) + /** * Aggregate function: returns the compact binary representation of the Datasketches ThetaSketch * built with the values in the input column and configured with the `lgNomEntries` nominal @@ -1319,6 +1330,26 @@ object functions { def theta_sketch_agg(columnName: String): Column = theta_sketch_agg(Column(columnName)) + /** + * Aggregate function: returns the compact binary representation of the Datasketches ThetaSketch + * built with the values in the input column, configured with `lgNomEntries` and `family`. + * + * @group agg_funcs + * @since 4.1.0 + */ + def theta_sketch_agg(e: Column, lgNomEntries: Int, family: String): Column = + Column.fn("theta_sketch_agg", e, lit(lgNomEntries), lit(family)) + + /** + * Aggregate function: returns the compact binary representation of the Datasketches ThetaSketch + * built with the values in the input column, configured with `lgNomEntries` and `family`. + * + * @group agg_funcs + * @since 4.1.0 + */ + def theta_sketch_agg(columnName: String, lgNomEntries: Int, family: String): Column = + theta_sketch_agg(Column(columnName), lgNomEntries, family) + /** * Aggregate function: returns the compact binary representation of the Datasketches * ThetaSketch, generated by the union of Datasketches ThetaSketch instances in the input column diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/thetasketchesAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/thetasketchesAggregates.scala index 0f148d03cd70b..229d871820f10 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/thetasketchesAggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/thetasketchesAggregates.scala @@ -17,14 +17,14 @@ package org.apache.spark.sql.catalyst.expressions.aggregate +import org.apache.datasketches.common.Family import org.apache.datasketches.memory.Memory import org.apache.datasketches.theta.{CompactSketch, Intersection, SetOperation, Sketch, Union, UpdateSketch, UpdateSketchBuilder} import org.apache.spark.SparkUnsupportedOperationException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, ExpressionDescription, Literal} -import org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate -import org.apache.spark.sql.catalyst.trees.{BinaryLike, UnaryLike} +import org.apache.spark.sql.catalyst.trees.{BinaryLike, TernaryLike, UnaryLike} import org.apache.spark.sql.catalyst.util.{ArrayData, CollationFactory, ThetaSketchUtils} import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.internal.types.StringTypeWithCollation @@ -59,10 +59,12 @@ case class FinalizedSketch(sketch: CompactSketch) extends ThetaSketchState { * * See [[https://datasketches.apache.org/docs/Theta/ThetaSketches.html]] for more information. * - * @param left + * @param first * child expression against which unique counting will occur - * @param right + * @param second * the log-base-2 of nomEntries decides the number of buckets for the sketch + * @param third + * the family of the sketch (QUICKSELECT or ALPHA) * @param mutableAggBufferOffset * offset for mutable aggregation buffer * @param inputAggBufferOffset @@ -71,49 +73,73 @@ case class FinalizedSketch(sketch: CompactSketch) extends ThetaSketchState { // scalastyle:off line.size.limit @ExpressionDescription( usage = """ - _FUNC_(expr, lgNomEntries) - Returns the ThetaSketch compact binary representation. + _FUNC_(expr, lgNomEntries, family) - Returns the ThetaSketch compact binary representation. `lgNomEntries` (optional) is the log-base-2 of nominal entries, with nominal entries deciding - the number buckets or slots for the ThetaSketch. """, + the number buckets or slots for the ThetaSketch. + `family` (optional) is the sketch family, either 'QUICKSELECT' or 'ALPHA' (defaults to + 'QUICKSELECT').""", examples = """ Examples: + > SELECT theta_sketch_estimate(_FUNC_(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col); + 3 > SELECT theta_sketch_estimate(_FUNC_(col, 12)) FROM VALUES (1), (1), (2), (2), (3) tab(col); 3 + > SELECT theta_sketch_estimate(_FUNC_(col, 15, 'ALPHA')) FROM VALUES (1), (1), (2), (2), (3) tab(col); + 3 """, group = "agg_funcs", since = "4.1.0") // scalastyle:on line.size.limit case class ThetaSketchAgg( - left: Expression, - right: Expression, - override val mutableAggBufferOffset: Int, - override val inputAggBufferOffset: Int) + first: Expression, + second: Expression, + third: Expression, + override val mutableAggBufferOffset: Int, + override val inputAggBufferOffset: Int) extends TypedImperativeAggregate[ThetaSketchState] - with BinaryLike[Expression] + with TernaryLike[Expression] with ExpectsInputTypes { // ThetaSketch config - mark as lazy so that they're not evaluated during tree transformation. - lazy val lgNomEntries: Int = { - if (!right.foldable) { + + private lazy val lgNomEntries: Int = { + if (!second.foldable) { throw QueryExecutionErrors.thetaLgNomEntriesMustBeConstantError(prettyName) } - val lgNomEntriesInput = right.eval().asInstanceOf[Int] + val lgNomEntriesInput = second.eval().asInstanceOf[Int] ThetaSketchUtils.checkLgNomLongs(lgNomEntriesInput, prettyName) lgNomEntriesInput } - // Constructors + private lazy val family: Family = { + if (!third.foldable) { + throw QueryExecutionErrors.thetaFamilyMustBeConstantError(prettyName) + } + val familyName = third.eval().asInstanceOf[UTF8String] + ThetaSketchUtils.parseFamily(familyName.toString, prettyName) + } def this(child: Expression) = { - this(child, Literal(ThetaSketchUtils.DEFAULT_LG_NOM_LONGS), 0, 0) + this(child, + Literal(ThetaSketchUtils.DEFAULT_LG_NOM_LONGS), + Literal(UTF8String.fromString(ThetaSketchUtils.DEFAULT_FAMILY)), + 0, 0) } def this(child: Expression, lgNomEntries: Expression) = { - this(child, lgNomEntries, 0, 0) + this(child, + lgNomEntries, + Literal(UTF8String.fromString(ThetaSketchUtils.DEFAULT_FAMILY)), + 0, 0) + } + + def this(child: Expression, lgNomEntries: Expression, family: Expression) = { + this(child, lgNomEntries, family, 0, 0) } def this(child: Expression, lgNomEntries: Int) = { - this(child, Literal(lgNomEntries), 0, 0) + this(child, Literal(lgNomEntries)) } // Copy constructors required by ImperativeAggregate @@ -124,16 +150,11 @@ case class ThetaSketchAgg( override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ThetaSketchAgg = copy(inputAggBufferOffset = newInputAggBufferOffset) - override protected def withNewChildrenInternal( - newLeft: Expression, - newRight: Expression): ThetaSketchAgg = - copy(left = newLeft, right = newRight) - // Overrides for TypedImperativeAggregate override def prettyName: String = "theta_sketch_agg" - override def inputTypes: Seq[AbstractDataType] = + override def inputTypes: Seq[AbstractDataType] = { Seq( TypeCollection( ArrayType(IntegerType), @@ -144,14 +165,16 @@ case class ThetaSketchAgg( IntegerType, LongType, StringTypeWithCollation(supportsTrimCollation = true)), - IntegerType) + IntegerType, + StringType) + } override def dataType: DataType = BinaryType override def nullable: Boolean = false /** - * Instantiate an UpdateSketch instance using the lgNomEntries param. + * Instantiate an UpdateSketch instance using the lgNomEntries and family params. * * @return * an UpdateSketch instance wrapped with UpdatableSketchBuffer @@ -159,6 +182,7 @@ case class ThetaSketchAgg( override def createAggregationBuffer(): ThetaSketchState = { val builder = new UpdateSketchBuilder builder.setLogNominalEntries(lgNomEntries) + builder.setFamily(family) UpdatableSketchBuffer(builder.build) } @@ -179,7 +203,7 @@ case class ThetaSketchAgg( */ override def update(updateBuffer: ThetaSketchState, input: InternalRow): ThetaSketchState = { // Return early for null values. - val v = left.eval(input) + val v = first.eval(input) if (v == null) return updateBuffer // Initialized buffer should be UpdatableSketchBuffer, else error out. @@ -189,7 +213,7 @@ case class ThetaSketchAgg( } // Handle the different data types for sketch updates. - left.dataType match { + first.dataType match { case ArrayType(IntegerType, _) => val arr = v.asInstanceOf[ArrayData].toIntArray() sketch.update(arr) @@ -216,7 +240,7 @@ case class ThetaSketchAgg( case _ => throw new SparkUnsupportedOperationException( errorClass = "_LEGACY_ERROR_TEMP_3121", - messageParameters = Map("dataType" -> left.dataType.toString)) + messageParameters = Map("dataType" -> first.dataType.toString)) } updateBuffer @@ -292,6 +316,9 @@ case class ThetaSketchAgg( this.createAggregationBuffer() } } + + override protected def withNewChildrenInternal(newFirst: Expression, newSecond: Expression, + newThird: Expression): ThetaSketchAgg = copy(newFirst, newSecond, newThird) } /** @@ -334,6 +361,7 @@ case class ThetaUnionAgg( // ThetaSketch config - mark as lazy so that they're not evaluated during tree transformation. + lazy val lgNomEntries: Int = { if (!right.foldable) { throw QueryExecutionErrors.thetaLgNomEntriesMustBeConstantError(prettyName) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ThetaSketchUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ThetaSketchUtils.scala index fb7b0d275e580..fe1c80753ff6b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ThetaSketchUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ThetaSketchUtils.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst.util +import java.util.Locale + import org.apache.datasketches.common.{Family, SketchesArgumentException} import org.apache.datasketches.memory.{Memory, MemoryBoundsException} import org.apache.datasketches.theta.CompactSketch @@ -46,6 +48,17 @@ object ThetaSketchUtils { final val MAX_LG_NOM_LONGS = 26 final val DEFAULT_LG_NOM_LONGS = 12 + /** ALPHA is optimized for speed and offers slightly better initial accuracy (lower error) + * for simple updates. Its estimation * precision reverts to the standard level if merged with + * other sketches. + * QUICKSELECT is the default and more flexible choice, providing the standard level of accuracy + * and full support for all set operations (Union, Intersection, etc.). + * */ + final val FAMILY_QUICKSELECT = "QUICKSELECT" + final val FAMILY_ALPHA = "ALPHA" + final val DEFAULT_FAMILY = FAMILY_QUICKSELECT + + /** * Validates the lgNomLongs parameter for Theta/Tuple sketch size. Throws a Spark SQL exception * if the value is out of bounds. @@ -65,6 +78,26 @@ object ThetaSketchUtils { } } + /** + * Converts a family string to DataSketches Family enum. + * Throws a Spark SQL exception if the family name is invalid. + * + * @param familyName The family name string + * @param prettyName The display name of the function/expression for error messages + * @return The corresponding DataSketches Family enum value + */ + def parseFamily(familyName: String, prettyName: String): Family = { + familyName.toUpperCase(Locale.ROOT) match { + case FAMILY_QUICKSELECT => Family.QUICKSELECT + case FAMILY_ALPHA => Family.ALPHA + case _ => + throw QueryExecutionErrors.thetaInvalidFamily( + function = prettyName, + value = familyName, + validFamilies = Seq(FAMILY_QUICKSELECT, FAMILY_ALPHA)) + } + } + /** * Wraps a byte array into a DataSketches CompactSketch object. This method safely deserializes * a compact Theta sketch from its binary representation, handling potential deserialization diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 3e0c268a6d50d..02a19efb70bd4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -3250,6 +3250,12 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE messageParameters = Map("function" -> toSQLId(function))) } + def thetaFamilyMustBeConstantError(function: String): Throwable = { + new SparkRuntimeException( + errorClass = "THETA_FAMILY_MUST_BE_CONSTANT", + messageParameters = Map("function" -> toSQLId(function))) + } + def kllSketchInvalidQuantileRangeError(function: String): Throwable = { new SparkRuntimeException( errorClass = "KLL_SKETCH_INVALID_QUANTILE_RANGE", @@ -3271,6 +3277,16 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE "k" -> toSQLValue(k, IntegerType))) } + def thetaInvalidFamily(function: String, value: String, validFamilies: + Seq[String]): Throwable = { + new SparkRuntimeException( + errorClass = "THETA_INVALID_FAMILY", + messageParameters = Map( + "function" -> toSQLId(function), + "value" -> toSQLValue(value, StringType), + "validFamilies" -> validFamilies.map(f => toSQLId(f)).mkString(", "))) + } + def vectorDimensionMismatchError( function: String, leftDim: Int, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ThetaSketchUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ThetaSketchUtilsSuite.scala index 17072acdcbc0f..902fbbb2dcb36 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ThetaSketchUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ThetaSketchUtilsSuite.scala @@ -35,6 +35,35 @@ class ThetaSketchUtilsSuite extends SparkFunSuite with SQLHelper { } } + test("parseFamily: accepts valid family names") { + // Test valid family names (case insensitive) + val validFamilies = Seq( + ("QUICKSELECT", ThetaSketchUtils.FAMILY_QUICKSELECT), + ("quickselect", ThetaSketchUtils.FAMILY_QUICKSELECT), + ("QuickSelect", ThetaSketchUtils.FAMILY_QUICKSELECT), + ("ALPHA", ThetaSketchUtils.FAMILY_ALPHA), + ("alpha", ThetaSketchUtils.FAMILY_ALPHA), + ("Alpha", ThetaSketchUtils.FAMILY_ALPHA) + ) + + validFamilies.foreach { case (input, expectedFamily) => + val result = ThetaSketchUtils.parseFamily(input, "test_function") + assert(result.toString == expectedFamily) + } + + val invalidFamilyName = "invalid" + checkError( + exception = intercept[SparkRuntimeException] { + ThetaSketchUtils.parseFamily(invalidFamilyName, "test_function") + }, + condition = "THETA_INVALID_FAMILY", + parameters = Map( + "function" -> "`test_function`", + "value" -> "'invalid'", + "validFamilies" -> "`QUICKSELECT`, `ALPHA`") + ) + } + test("checkLgNomLongs: throws exception for values below minimum") { val invalidValues = Seq(ThetaSketchUtils.MIN_LG_NOM_LONGS - 1, 0, -5) invalidValues.foreach { value => diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index f2c72fa18ed6d..c48c0c6781c96 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -358,10 +358,10 @@ | org.apache.spark.sql.catalyst.expressions.Subtract | - | SELECT 2 - 1 | struct<(2 - 1):int> | | org.apache.spark.sql.catalyst.expressions.Tan | tan | SELECT tan(0) | struct | | org.apache.spark.sql.catalyst.expressions.Tanh | tanh | SELECT tanh(0) | struct | -| org.apache.spark.sql.catalyst.expressions.ThetaDifference | theta_difference | SELECT theta_sketch_estimate(theta_difference(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (5, 4), (1, 4), (2, 5), (2, 5), (3, 1) tab(col1, col2) | struct | -| org.apache.spark.sql.catalyst.expressions.ThetaIntersection | theta_intersection | SELECT theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (5, 4), (1, 4), (2, 5), (2, 5), (3, 1) tab(col1, col2) | struct | -| org.apache.spark.sql.catalyst.expressions.ThetaSketchEstimate | theta_sketch_estimate | SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col) | struct | -| org.apache.spark.sql.catalyst.expressions.ThetaUnion | theta_union | SELECT theta_sketch_estimate(theta_union(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (1, 4), (1, 4), (2, 5), (2, 5), (3, 6) tab(col1, col2) | struct | +| org.apache.spark.sql.catalyst.expressions.ThetaDifference | theta_difference | SELECT theta_sketch_estimate(theta_difference(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (5, 4), (1, 4), (2, 5), (2, 5), (3, 1) tab(col1, col2) | struct | +| org.apache.spark.sql.catalyst.expressions.ThetaIntersection | theta_intersection | SELECT theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (5, 4), (1, 4), (2, 5), (2, 5), (3, 1) tab(col1, col2) | struct | +| org.apache.spark.sql.catalyst.expressions.ThetaSketchEstimate | theta_sketch_estimate | SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.ThetaUnion | theta_union | SELECT theta_sketch_estimate(theta_union(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (1, 4), (1, 4), (2, 5), (2, 5), (3, 6) tab(col1, col2) | struct | | org.apache.spark.sql.catalyst.expressions.TimeDiff | time_diff | SELECT time_diff('HOUR', TIME'20:30:29', TIME'21:30:28') | struct | | org.apache.spark.sql.catalyst.expressions.TimeFromMicros | time_from_micros | SELECT time_from_micros(0) | struct | | org.apache.spark.sql.catalyst.expressions.TimeFromMillis | time_from_millis | SELECT time_from_millis(0) | struct | @@ -406,12 +406,12 @@ | org.apache.spark.sql.catalyst.expressions.TryValidateUTF8 | try_validate_utf8 | SELECT try_validate_utf8('Spark') | struct | | org.apache.spark.sql.catalyst.expressions.TupleDifferenceDouble | tuple_difference_double | SELECT tuple_sketch_estimate_double(tuple_difference_double(tuple_sketch_agg_double(col1, val1), tuple_sketch_agg_double(col2, val2))) FROM VALUES (5, 5.0D, 4, 4.0D), (1, 1.0D, 4, 4.0D), (2, 2.0D, 5, 5.0D), (3, 3.0D, 1, 1.0D) tab(col1, val1, col2, val2) | struct | | org.apache.spark.sql.catalyst.expressions.TupleDifferenceInteger | tuple_difference_integer | SELECT tuple_sketch_estimate_integer(tuple_difference_integer(tuple_sketch_agg_integer(col1, val1), tuple_sketch_agg_integer(col2, val2))) FROM VALUES (5, 5, 4, 4), (1, 1, 4, 4), (2, 2, 5, 5), (3, 3, 1, 1) tab(col1, val1, col2, val2) | struct | -| org.apache.spark.sql.catalyst.expressions.TupleDifferenceThetaDouble | tuple_difference_theta_double | SELECT tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(col1, val1), theta_sketch_agg(col2))) FROM VALUES (5, 5.0D, 4), (1, 1.0D, 4), (2, 2.0D, 5), (3, 3.0D, 1) tab(col1, val1, col2) | struct | -| org.apache.spark.sql.catalyst.expressions.TupleDifferenceThetaInteger | tuple_difference_theta_integer | SELECT tuple_sketch_estimate_integer(tuple_difference_theta_integer(tuple_sketch_agg_integer(col1, val1), theta_sketch_agg(col2))) FROM VALUES (5, 5, 4), (1, 1, 4), (2, 2, 5), (3, 3, 1) tab(col1, val1, col2) | struct | +| org.apache.spark.sql.catalyst.expressions.TupleDifferenceThetaDouble | tuple_difference_theta_double | SELECT tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(col1, val1), theta_sketch_agg(col2))) FROM VALUES (5, 5.0D, 4), (1, 1.0D, 4), (2, 2.0D, 5), (3, 3.0D, 1) tab(col1, val1, col2) | struct | +| org.apache.spark.sql.catalyst.expressions.TupleDifferenceThetaInteger | tuple_difference_theta_integer | SELECT tuple_sketch_estimate_integer(tuple_difference_theta_integer(tuple_sketch_agg_integer(col1, val1), theta_sketch_agg(col2))) FROM VALUES (5, 5, 4), (1, 1, 4), (2, 2, 5), (3, 3, 1) tab(col1, val1, col2) | struct | | org.apache.spark.sql.catalyst.expressions.TupleIntersectionDouble | tuple_intersection_double | SELECT tuple_sketch_estimate_double(tuple_intersection_double(tuple_sketch_agg_double(col1, val1), tuple_sketch_agg_double(col2, val2))) FROM VALUES (1, 1.0D, 1, 4.0D), (2, 2.0D, 2, 5.0D), (3, 3.0D, 4, 6.0D) tab(col1, val1, col2, val2) | struct | | org.apache.spark.sql.catalyst.expressions.TupleIntersectionInteger | tuple_intersection_integer | SELECT tuple_sketch_estimate_integer(tuple_intersection_integer(tuple_sketch_agg_integer(col1, val1), tuple_sketch_agg_integer(col2, val2))) FROM VALUES (1, 1, 1, 4), (2, 2, 2, 5), (3, 3, 4, 6) tab(col1, val1, col2, val2) | struct | -| org.apache.spark.sql.catalyst.expressions.TupleIntersectionThetaDouble | tuple_intersection_theta_double | SELECT tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(col1, val1), theta_sketch_agg(col2))) FROM VALUES (1, 1.0D, 1), (2, 2.0D, 2), (3, 3.0D, 4) tab(col1, val1, col2) | struct | -| org.apache.spark.sql.catalyst.expressions.TupleIntersectionThetaInteger | tuple_intersection_theta_integer | SELECT tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(col1, val1), theta_sketch_agg(col2))) FROM VALUES (1, 1, 1), (2, 2, 2), (3, 3, 4) tab(col1, val1, col2) | struct | +| org.apache.spark.sql.catalyst.expressions.TupleIntersectionThetaDouble | tuple_intersection_theta_double | SELECT tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(col1, val1), theta_sketch_agg(col2))) FROM VALUES (1, 1.0D, 1), (2, 2.0D, 2), (3, 3.0D, 4) tab(col1, val1, col2) | struct | +| org.apache.spark.sql.catalyst.expressions.TupleIntersectionThetaInteger | tuple_intersection_theta_integer | SELECT tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(col1, val1), theta_sketch_agg(col2))) FROM VALUES (1, 1, 1), (2, 2, 2), (3, 3, 4) tab(col1, val1, col2) | struct | | org.apache.spark.sql.catalyst.expressions.TupleSketchEstimateDouble | tuple_sketch_estimate_double | SELECT tuple_sketch_estimate_double(tuple_sketch_agg_double(key, summary)) FROM VALUES (1, 1.0D), (1, 2.0D), (2, 3.0D) tab(key, summary) | struct | | org.apache.spark.sql.catalyst.expressions.TupleSketchEstimateInteger | tuple_sketch_estimate_integer | SELECT tuple_sketch_estimate_integer(tuple_sketch_agg_integer(key, summary)) FROM VALUES (1, 1), (1, 2), (2, 3) tab(key, summary) | struct | | org.apache.spark.sql.catalyst.expressions.TupleSketchSummaryDouble | tuple_sketch_summary_double | SELECT tuple_sketch_summary_double(tuple_sketch_agg_double(key, summary)) FROM VALUES (1, 1.0D), (1, 2.0D), (2, 3.0D) tab(key, summary) | struct | @@ -420,8 +420,8 @@ | org.apache.spark.sql.catalyst.expressions.TupleSketchThetaInteger | tuple_sketch_theta_integer | SELECT tuple_sketch_theta_integer(tuple_sketch_agg_integer(key, summary)) FROM VALUES (1, 1), (2, 2), (3, 3) tab(key, summary) | struct | | org.apache.spark.sql.catalyst.expressions.TupleUnionDoubleExpressionBuilder | tuple_union_double | SELECT tuple_sketch_estimate_double(tuple_union_double(tuple_sketch_agg_double(col1, val1), tuple_sketch_agg_double(col2, val2))) FROM VALUES (1, 1.0D, 4, 4.0D), (2, 2.0D, 5, 5.0D), (3, 3.0D, 6, 6.0D) tab(col1, val1, col2, val2) | struct | | org.apache.spark.sql.catalyst.expressions.TupleUnionIntegerExpressionBuilder | tuple_union_integer | SELECT tuple_sketch_estimate_integer(tuple_union_integer(tuple_sketch_agg_integer(col1, val1), tuple_sketch_agg_integer(col2, val2))) FROM VALUES (1, 1, 4, 4), (2, 2, 5, 5), (3, 3, 6, 6) tab(col1, val1, col2, val2) | struct | -| org.apache.spark.sql.catalyst.expressions.TupleUnionThetaDoubleExpressionBuilder | tuple_union_theta_double | SELECT tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(col1, val1), theta_sketch_agg(col2))) FROM VALUES (1, 1.0D, 4), (2, 2.0D, 5), (3, 3.0D, 6) tab(col1, val1, col2) | struct | -| org.apache.spark.sql.catalyst.expressions.TupleUnionThetaIntegerExpressionBuilder | tuple_union_theta_integer | SELECT tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(col1, val1), theta_sketch_agg(col2))) FROM VALUES (1, 1, 4), (2, 2, 5), (3, 3, 6) tab(col1, val1, col2) | struct | +| org.apache.spark.sql.catalyst.expressions.TupleUnionThetaDoubleExpressionBuilder | tuple_union_theta_double | SELECT tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(col1, val1), theta_sketch_agg(col2))) FROM VALUES (1, 1.0D, 4), (2, 2.0D, 5), (3, 3.0D, 6) tab(col1, val1, col2) | struct | +| org.apache.spark.sql.catalyst.expressions.TupleUnionThetaIntegerExpressionBuilder | tuple_union_theta_integer | SELECT tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(col1, val1), theta_sketch_agg(col2))) FROM VALUES (1, 1, 4), (2, 2, 5), (3, 3, 6) tab(col1, val1, col2) | struct | | org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1) | struct | | org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT unbase64('U3BhcmsgU1FM') | struct | | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct | @@ -522,7 +522,7 @@ | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | stddev_samp | SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.Sum | sum | SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.ThetaIntersectionAgg | theta_intersection_agg | SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) FROM (SELECT theta_sketch_agg(col) as sketch FROM VALUES (1) tab(col) UNION ALL SELECT theta_sketch_agg(col, 20) as sketch FROM VALUES (1) tab(col)) | struct | -| org.apache.spark.sql.catalyst.expressions.aggregate.ThetaSketchAgg | theta_sketch_agg | SELECT theta_sketch_estimate(theta_sketch_agg(col, 12)) FROM VALUES (1), (1), (2), (2), (3) tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.ThetaSketchAgg | theta_sketch_agg | SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.ThetaUnionAgg | theta_union_agg | SELECT theta_sketch_estimate(theta_union_agg(sketch)) FROM (SELECT theta_sketch_agg(col) as sketch FROM VALUES (1) tab(col) UNION ALL SELECT theta_sketch_agg(col, 20) as sketch FROM VALUES (1) tab(col)) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.TryAverageExpressionBuilder | try_avg | SELECT try_avg(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.TrySumExpressionBuilder | try_sum | SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col) | struct | diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/thetasketch.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/thetasketch.sql.out index 84fb8086151d1..b10a199500c06 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/thetasketch.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/thetasketch.sql.out @@ -167,7 +167,7 @@ CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`t_string_collati -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) AS result FROM t_int_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS result#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0)) AS result#xL] +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet @@ -175,7 +175,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS result#x -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_array_int_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet @@ -183,7 +183,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS theta_sk -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col2)) FROM t_array_long_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col2#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col2, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col2, 12, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet @@ -191,7 +191,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col2#x, 12, 0, 0)) AS theta_sk -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_binary_a_b_through_e_f -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet @@ -199,7 +199,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS theta_sk -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -207,7 +207,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS theta_sk -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col2)) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col2#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col2, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col2, 12, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -215,7 +215,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col2#x, 12, 0, 0)) AS theta_sk -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1, 22)) FROM t_int_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 22, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 22))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 22, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 22, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet @@ -223,7 +223,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 22, 0, 0)) AS theta_sk -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_long_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#xL, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#xL, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet @@ -231,7 +231,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#xL, 12, 0, 0)) AS theta_s -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_string_a_d_through_e_h -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet @@ -242,7 +242,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_int_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), 12))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), 12))#xL] +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet @@ -253,7 +253,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 15), theta_sketch_agg(col2))) FROM t_long_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#xL, 15, 0, 0), theta_sketch_agg(col2#xL, 12, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 15), theta_sketch_agg(col2, 12), 12))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#xL, 15, QUICKSELECT, 0, 0), theta_sketch_agg(col2#xL, 12, QUICKSELECT, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 15, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), 12))#xL] +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet @@ -264,7 +264,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), 12))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), 12))#xL] +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -275,7 +275,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 6), theta_sketch_agg(col2, 15), 15)) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 6, 0, 0), theta_sketch_agg(col2#x, 15, 0, 0), 15)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 6), theta_sketch_agg(col2, 15), 15))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 6, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 15, QUICKSELECT, 0, 0), 15)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 6, QUICKSELECT), theta_sketch_agg(col2, 15, QUICKSELECT), 15))#xL] +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -286,7 +286,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_string_a_d_through_e_h -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), 12))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), 12))#xL] +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet @@ -297,7 +297,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2), 20)) FROM t_binary_a_b_through_e_f -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0), 20)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), 20))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0), 20)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), 20))#xL] +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet @@ -308,7 +308,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_array_int_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), 12))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), 12))#xL] +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet @@ -319,7 +319,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 13))) FROM t_array_long_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 13, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 13), 12))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 13, QUICKSELECT, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 13, QUICKSELECT), 12))#xL] +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet @@ -330,7 +330,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_int_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet @@ -341,7 +341,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 5), theta_sketch_agg(col2, 12))) FROM t_long_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#xL, 5, 0, 0), theta_sketch_agg(col2#xL, 12, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 5), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#xL, 5, QUICKSELECT, 0, 0), theta_sketch_agg(col2#xL, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 5, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet @@ -352,7 +352,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -363,7 +363,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 5), theta_sketch_agg(col2))) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 5, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 5), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 5, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 5, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -374,7 +374,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_string_a_d_through_e_h -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet @@ -385,7 +385,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 22))) FROM t_binary_a_b_through_e_f -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 22, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 22)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 22, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 22, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet @@ -396,7 +396,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_array_int_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet @@ -407,7 +407,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 10))) FROM t_array_long_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 10, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 10)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 10, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 10, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet @@ -418,7 +418,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_int_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet @@ -429,7 +429,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 5))) FROM t_long_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#xL, 12, 0, 0), theta_sketch_agg(col2#xL, 5, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 5)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#xL, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#xL, 5, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 5, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet @@ -440,7 +440,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -451,7 +451,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 12), theta_sketch_agg(col2))) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -462,7 +462,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_string_a_d_through_e_h -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet @@ -473,7 +473,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 6), theta_sketch_agg(col2, 8))) FROM t_binary_a_b_through_e_f -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 6, 0, 0), theta_sketch_agg(col2#x, 8, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 6), theta_sketch_agg(col2, 8)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 6, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 8, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 6, QUICKSELECT), theta_sketch_agg(col2, 8, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet @@ -484,7 +484,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_array_int_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet @@ -495,7 +495,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 4))) FROM t_array_long_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 4, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 4)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 4, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 4, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet @@ -509,10 +509,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch, 15)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 15, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 15))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 : +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 20, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 20, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet @@ -526,10 +526,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch, 12)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 12, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 12))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 : +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -543,10 +543,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch, 14)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 14, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 14))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h : +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet @@ -560,10 +560,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch, 10)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 10, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 10))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#xL, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#xL, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 : +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet - +- Aggregate [theta_sketch_agg(col2#xL, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#xL, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet @@ -577,10 +577,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch, 6)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 6, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 6))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 : +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -594,10 +594,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 12, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 12))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f : +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet @@ -611,10 +611,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch, 12)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 12, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 12))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 : +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet @@ -628,10 +628,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch, 16)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 16, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 16))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 : +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet @@ -645,10 +645,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 : +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet @@ -662,10 +662,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#xL, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#xL, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 : +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet - +- Aggregate [theta_sketch_agg(col2#xL, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#xL, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet @@ -679,10 +679,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 : +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -696,10 +696,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 : +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -713,10 +713,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h : +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet @@ -730,10 +730,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f : +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet @@ -747,10 +747,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 : +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet @@ -764,10 +764,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 : +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet @@ -776,7 +776,7 @@ Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS thet SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (1), (null), (2), (null), (3) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -785,7 +785,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES ('test'), (null), ('null'), (null) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -794,7 +794,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (100L), (null), (200L), (null), (300L) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#xL, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#xL, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#xL] @@ -803,7 +803,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#xL, 12, 0, 0)) AS theta_sk SELECT theta_sketch_estimate(theta_sketch_agg(CAST(col AS DOUBLE))) FROM VALUES (1.1), (null), (2.2), (null), (3.3) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(cast(col#x as double), 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(CAST(col AS DOUBLE), 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(cast(col#x as double), 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(CAST(col AS DOUBLE), 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -812,7 +812,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(cast(col#x as double), 12, 0, SELECT theta_sketch_estimate(theta_sketch_agg(CAST(col AS FLOAT))) FROM VALUES (1.5), (null), (2.5), (null), (3.5) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(cast(col#x as float), 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(CAST(col AS FLOAT), 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(cast(col#x as float), 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(CAST(col AS FLOAT), 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -821,7 +821,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(cast(col#x as float), 12, 0, 0 SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (X'AA'), (null), (X'BB'), (null), (X'CC') tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -830,7 +830,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(1, 2)), (null), (ARRAY(3, 4)), (null), (ARRAY(5, 6)) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -839,7 +839,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(10L, 20L)), (null), (ARRAY(30L, 40L)), (null), (ARRAY(50L, 60L)) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -848,7 +848,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(1, null)), (ARRAY(1)), (ARRAY(2, null, 3)), (ARRAY(4)) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -857,7 +857,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(10L, null)), (ARRAY(10L)), (ARRAY(20L, null, 30L)), (ARRAY(40L)) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -866,7 +866,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY()), (ARRAY(1, 2)), (ARRAY()), (ARRAY(3, 4)) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -875,7 +875,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (''), ('a'), (''), ('b'), ('c') tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -884,7 +884,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (X''), (X'01'), (X'02'), (X'03'), (CAST(' ' AS BINARY)), (X'e280'), (X'c1'), (X'c120') tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -892,7 +892,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) utf8_b FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS utf8_b#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0)) AS utf8_b#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -900,7 +900,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS utf8_b#x -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1 COLLATE UTF8_LCASE)) utf8_lc FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_LCASE), 12, 0, 0)) AS utf8_lc#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_LCASE), 12, QUICKSELECT, 0, 0)) AS utf8_lc#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -908,7 +908,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_LCASE), 1 -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1 COLLATE UNICODE)) unicode FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE), 12, 0, 0)) AS unicode#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE), 12, QUICKSELECT, 0, 0)) AS unicode#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -916,7 +916,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE), 12, -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1 COLLATE UNICODE_CI)) unicode_ci FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_CI), 12, 0, 0)) AS unicode_ci#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_CI), 12, QUICKSELECT, 0, 0)) AS unicode_ci#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -924,7 +924,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_CI), 1 -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1 COLLATE UTF8_BINARY_RTRIM)) utf8_b_rt FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_BINARY_RTRIM), 12, 0, 0)) AS utf8_b_rt#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_BINARY_RTRIM), 12, QUICKSELECT, 0, 0)) AS utf8_b_rt#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -932,7 +932,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_BINARY_RT -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1 COLLATE UTF8_LCASE_RTRIM)) utf8_lc_rt FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_LCASE_RTRIM), 12, 0, 0)) AS utf8_lc_rt#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_LCASE_RTRIM), 12, QUICKSELECT, 0, 0)) AS utf8_lc_rt#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -940,7 +940,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_LCASE_RTR -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1 COLLATE UNICODE_RTRIM)) unicode_rt FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_RTRIM), 12, 0, 0)) AS unicode_rt#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_RTRIM), 12, QUICKSELECT, 0, 0)) AS unicode_rt#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -948,7 +948,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_RTRIM) -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1 COLLATE UNICODE_CI_RTRIM)) unicode_ci_rt FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_CI_RTRIM), 12, 0, 0)) AS unicode_ci_rt#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_CI_RTRIM), 12, QUICKSELECT, 0, 0)) AS unicode_ci_rt#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -982,16 +982,16 @@ WithCTE : +- Union false, false : :- Union false, false : : :- Union false, false -: : : :- Aggregate [int_sketch AS sketch_type#x, theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] +: : : :- Aggregate [int_sketch AS sketch_type#x, theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : : : : +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 : : : : +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet -: : : +- Aggregate [long_sketch AS sketch_type#x, theta_sketch_agg(col1#xL, 15, 0, 0) AS sketch#x] +: : : +- Aggregate [long_sketch AS sketch_type#x, theta_sketch_agg(col1#xL, 15, QUICKSELECT, 0, 0) AS sketch#x] : : : +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 : : : +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet -: : +- Aggregate [double_sketch AS sketch_type#x, theta_sketch_agg(col1#x, 10, 0, 0) AS sketch#x] +: : +- Aggregate [double_sketch AS sketch_type#x, theta_sketch_agg(col1#x, 10, QUICKSELECT, 0, 0) AS sketch#x] : : +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 : : +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet -: +- Aggregate [string_sketch AS sketch_type#x, theta_sketch_agg(col1#x, 14, 0, 0) AS sketch#x] +: +- Aggregate [string_sketch AS sketch_type#x, theta_sketch_agg(col1#x, 14, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h : +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet :- CTERelationDef xxxx, false @@ -1001,7 +1001,7 @@ WithCTE : +- CTERelationRef xxxx, true, [sketch_type#x, sketch#x], false, false, 4 :- CTERelationDef xxxx, false : +- SubqueryAlias individual_sketches -: +- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch1#x, theta_sketch_agg(col2#x, 12, 0, 0) AS sketch2#x] +: +- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch1#x, theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch2#x] : +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 : +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet +- Project [theta_sketch_estimate(scalar-subquery#x []) AS union_estimate#xL, theta_sketch_estimate(theta_union(sketch1#x, sketch2#x, 15)) AS binary_union_estimate#xL, theta_sketch_estimate(theta_intersection(sketch1#x, sketch2#x)) AS intersection_estimate#xL, theta_sketch_estimate(theta_difference(sketch1#x, sketch2#x)) AS difference_estimate#xL] @@ -1016,7 +1016,7 @@ WithCTE SELECT theta_sketch_agg(col, 2) FROM VALUES (50), (60), (60) tab(col) -- !query analysis -Aggregate [theta_sketch_agg(col#x, 2, 0, 0) AS theta_sketch_agg(col, 2)#x] +Aggregate [theta_sketch_agg(col#x, 2, QUICKSELECT, 0, 0) AS theta_sketch_agg(col, 2, QUICKSELECT)#x] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -1025,7 +1025,7 @@ Aggregate [theta_sketch_agg(col#x, 2, 0, 0) AS theta_sketch_agg(col, 2)#x] SELECT theta_sketch_agg(col, 40) FROM VALUES (50), (60), (60) tab(col) -- !query analysis -Aggregate [theta_sketch_agg(col#x, 40, 0, 0) AS theta_sketch_agg(col, 40)#x] +Aggregate [theta_sketch_agg(col#x, 40, QUICKSELECT, 0, 0) AS theta_sketch_agg(col, 40, QUICKSELECT)#x] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -1041,10 +1041,10 @@ FROM (SELECT theta_sketch_agg(col, 12) as sketch Aggregate [theta_union_agg(sketch#x, 3, 0, 0) AS theta_union_agg(sketch, 3)#x] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias tab : +- LocalRelation [col#x] - +- Aggregate [theta_sketch_agg(col#x, 20, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col#x, 20, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -1060,10 +1060,10 @@ FROM (SELECT theta_sketch_agg(col, 12) as sketch Aggregate [theta_union_agg(sketch#x, 27, 0, 0) AS theta_union_agg(sketch, 27)#x] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias tab : +- LocalRelation [col#x] - +- Aggregate [theta_sketch_agg(col#x, 20, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col#x, 20, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -1072,7 +1072,7 @@ Aggregate [theta_union_agg(sketch#x, 27, 0, 0) AS theta_union_agg(sketch, 27)#x] SELECT theta_sketch_agg(col, CAST(NULL AS INT)) AS lg_nom_entries_is_null FROM VALUES (15), (16), (17) tab(col) -- !query analysis -Aggregate [theta_sketch_agg(col#x, cast(null as int), 0, 0) AS lg_nom_entries_is_null#x] +Aggregate [theta_sketch_agg(col#x, cast(null as int), QUICKSELECT, 0, 0) AS lg_nom_entries_is_null#x] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -1081,7 +1081,16 @@ Aggregate [theta_sketch_agg(col#x, cast(null as int), 0, 0) AS lg_nom_entries_is SELECT theta_sketch_agg(col, CAST(col AS INT)) AS lg_nom_entries_non_constant FROM VALUES (15), (16), (17) tab(col) -- !query analysis -Aggregate [theta_sketch_agg(col#x, cast(col#x as int), 0, 0) AS lg_nom_entries_non_constant#x] +Aggregate [theta_sketch_agg(col#x, cast(col#x as int), QUICKSELECT, 0, 0) AS lg_nom_entries_non_constant#x] ++- SubqueryAlias tab + +- LocalRelation [col#x] + + +-- !query +SELECT theta_sketch_agg(col, 12, CAST(col AS STRING)) AS family_non_constant +FROM VALUES ('QUICKSELECT'), ('ALPHA'), ('QUICKSELECT') tab(col) +-- !query analysis +Aggregate [theta_sketch_agg(col#x, 12, cast(col#x as string), 0, 0) AS family_non_constant#x] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -1099,7 +1108,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "inputType" : "\"STRING\"", "paramIndex" : "second", "requiredType" : "\"INT\"", - "sqlExpr" : "\"theta_sketch_agg(col, 15)\"" + "sqlExpr" : "\"theta_sketch_agg(col, 15, QUICKSELECT)\"" }, "queryContext" : [ { "objectType" : "", @@ -1213,7 +1222,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "inputType" : "\"STRING\"", "paramIndex" : "third", "requiredType" : "\"INT\"", - "sqlExpr" : "\"theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), invalid)\"" + "sqlExpr" : "\"theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), invalid)\"" }, "queryContext" : [ { "objectType" : "", @@ -1243,7 +1252,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "inputType" : "\"STRING\"", "paramIndex" : "second", "requiredType" : "\"BINARY\"", - "sqlExpr" : "\"theta_intersection(theta_sketch_agg(col1, 12), invalid_sketch)\"" + "sqlExpr" : "\"theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), invalid_sketch)\"" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/tuplesketch.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/tuplesketch.sql.out index b241d2e1f04bf..4ae90604957f8 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/tuplesketch.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/tuplesketch.sql.out @@ -749,7 +749,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 12, sum)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12), 12, sum))#x] +Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 12, sum)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), 12, sum))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -761,7 +761,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 15)) FROM t_long_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#xL, val1#x, 15, sum, 0, 0), theta_sketch_agg(key2#xL, 12, 0, 0), 15, sum)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 15, sum), theta_sketch_agg(key2, 12), 15, sum))#x] +Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#xL, val1#x, 15, sum, 0, 0), theta_sketch_agg(key2#xL, 12, QUICKSELECT, 0, 0), 15, sum)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 15, sum), theta_sketch_agg(key2, 12, QUICKSELECT), 15, sum))#x] +- SubqueryAlias spark_catalog.default.t_long_double_1_5_through_7_11 +- Relation spark_catalog.default.t_long_double_1_5_through_7_11[key1#xL,val1#x,key2#xL,val2#x] parquet @@ -773,7 +773,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_double_double_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 12, sum)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12), 12, sum))#x] +Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 12, sum)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), 12, sum))#x] +- SubqueryAlias spark_catalog.default.t_double_double_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_double_double_1_1_1_4_through_1_5_1_8[key1#x,val1#x,key2#x,val2#x] parquet @@ -785,7 +785,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 14)) FROM t_string_double_a_d_through_e_h -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 14, sum)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12), 14, sum))#x] +Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 14, sum)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), 14, sum))#x] +- SubqueryAlias spark_catalog.default.t_string_double_a_d_through_e_h +- Relation spark_catalog.default.t_string_double_a_d_through_e_h[key1#x,val1#x,key2#x,val2#x] parquet @@ -797,7 +797,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 12, 'sum')) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 12, sum)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12), 12, sum))#x] +Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 12, sum)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), 12, sum))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -809,7 +809,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2))) FROM t_int_int_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 12, sum)) AS tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, sum), theta_sketch_agg(key2, 12), 12, sum))#x] +Aggregate [tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 12, sum)) AS tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), 12, sum))#x] +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -821,7 +821,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 12, 'sum')) FROM t_int_int_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 12, sum)) AS tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, sum), theta_sketch_agg(key2, 12), 12, sum))#x] +Aggregate [tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 12, sum)) AS tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), 12, sum))#x] +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -833,7 +833,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), sum)) AS tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12), sum))#x] +Aggregate [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), sum)) AS tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), sum))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -845,7 +845,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_long_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1#xL, val1#x, 5, sum, 0, 0), theta_sketch_agg(key2#xL, 12, 0, 0), sum)) AS tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1, val1, 5, sum), theta_sketch_agg(key2, 12), sum))#x] +Aggregate [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1#xL, val1#x, 5, sum, 0, 0), theta_sketch_agg(key2#xL, 12, QUICKSELECT, 0, 0), sum)) AS tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1, val1, 5, sum), theta_sketch_agg(key2, 12, QUICKSELECT), sum))#x] +- SubqueryAlias spark_catalog.default.t_long_double_1_5_through_7_11 +- Relation spark_catalog.default.t_long_double_1_5_through_7_11[key1#xL,val1#x,key2#xL,val2#x] parquet @@ -857,7 +857,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_double_double_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), sum)) AS tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12), sum))#x] +Aggregate [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), sum)) AS tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), sum))#x] +- SubqueryAlias spark_catalog.default.t_double_double_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_double_double_1_1_1_4_through_1_5_1_8[key1#x,val1#x,key2#x,val2#x] parquet @@ -869,7 +869,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_string_double_a_d_through_e_h -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), sum)) AS tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12), sum))#x] +Aggregate [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), sum)) AS tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), sum))#x] +- SubqueryAlias spark_catalog.default.t_string_double_a_d_through_e_h +- Relation spark_catalog.default.t_string_double_a_d_through_e_h[key1#x,val1#x,key2#x,val2#x] parquet @@ -881,7 +881,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 'min')) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, min, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), min)) AS tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1, val1, 12, min), theta_sketch_agg(key2, 12), min))#x] +Aggregate [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, min, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), min)) AS tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1, val1, 12, min), theta_sketch_agg(key2, 12, QUICKSELECT), min))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -893,7 +893,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2))) FROM t_int_int_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), sum)) AS tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, sum), theta_sketch_agg(key2, 12), sum))#x] +Aggregate [tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), sum)) AS tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), sum))#x] +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -905,7 +905,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 'sum')) FROM t_int_int_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), sum)) AS tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, sum), theta_sketch_agg(key2, 12), sum))#x] +Aggregate [tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), sum)) AS tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), sum))#x] +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -917,7 +917,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0))) AS tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12)))#x] +Aggregate [tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0))) AS tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT)))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -929,7 +929,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_long_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1#xL, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#xL, 12, 0, 0))) AS tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12)))#x] +Aggregate [tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1#xL, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#xL, 12, QUICKSELECT, 0, 0))) AS tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT)))#x] +- SubqueryAlias spark_catalog.default.t_long_double_1_5_through_7_11 +- Relation spark_catalog.default.t_long_double_1_5_through_7_11[key1#xL,val1#x,key2#xL,val2#x] parquet @@ -941,7 +941,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_double_double_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0))) AS tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12)))#x] +Aggregate [tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0))) AS tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT)))#x] +- SubqueryAlias spark_catalog.default.t_double_double_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_double_double_1_1_1_4_through_1_5_1_8[key1#x,val1#x,key2#x,val2#x] parquet @@ -953,7 +953,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_string_double_a_d_through_e_h -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0))) AS tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12)))#x] +Aggregate [tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0))) AS tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT)))#x] +- SubqueryAlias spark_catalog.default.t_string_double_a_d_through_e_h +- Relation spark_catalog.default.t_string_double_a_d_through_e_h[key1#x,val1#x,key2#x,val2#x] parquet @@ -965,7 +965,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2))) FROM t_int_int_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_integer(tuple_difference_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0))) AS tuple_sketch_estimate_integer(tuple_difference_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, sum), theta_sketch_agg(key2, 12)))#x] +Aggregate [tuple_sketch_estimate_integer(tuple_difference_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0))) AS tuple_sketch_estimate_integer(tuple_difference_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT)))#x] +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -977,7 +977,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 12, 'max')) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, max, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 12, max)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, max), theta_sketch_agg(key2, 12), 12, max))#x] +Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, max, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 12, max)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, max), theta_sketch_agg(key2, 12, QUICKSELECT), 12, max))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -989,7 +989,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 12, 'min')) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, min, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 12, min)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, min), theta_sketch_agg(key2, 12), 12, min))#x] +Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, min, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 12, min)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, min), theta_sketch_agg(key2, 12, QUICKSELECT), 12, min))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -1001,7 +1001,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 12, 'alwaysone')) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, alwaysone, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 12, alwaysone)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, alwaysone), theta_sketch_agg(key2, 12), 12, alwaysone))#x] +Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, alwaysone, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 12, alwaysone)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, alwaysone), theta_sketch_agg(key2, 12, QUICKSELECT), 12, alwaysone))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -1013,7 +1013,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 12, 'max')) FROM t_int_int_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, max, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 12, max)) AS tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, max), theta_sketch_agg(key2, 12), 12, max))#x] +Aggregate [tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, max, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 12, max)) AS tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, max), theta_sketch_agg(key2, 12, QUICKSELECT), 12, max))#x] +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -1025,7 +1025,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 12, 'min')) FROM t_int_int_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, min, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 12, min)) AS tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, min), theta_sketch_agg(key2, 12), 12, min))#x] +Aggregate [tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, min, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 12, min)) AS tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, min), theta_sketch_agg(key2, 12, QUICKSELECT), 12, min))#x] +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -1037,7 +1037,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 12, 'alwaysone')) FROM t_int_int_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, alwaysone, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 12, alwaysone)) AS tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, alwaysone), theta_sketch_agg(key2, 12), 12, alwaysone))#x] +Aggregate [tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, alwaysone, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 12, alwaysone)) AS tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, alwaysone), theta_sketch_agg(key2, 12, QUICKSELECT), 12, alwaysone))#x] +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -1049,7 +1049,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 'max')) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, max, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), max)) AS tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1, val1, 12, max), theta_sketch_agg(key2, 12), max))#x] +Aggregate [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, max, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), max)) AS tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1, val1, 12, max), theta_sketch_agg(key2, 12, QUICKSELECT), max))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -1061,7 +1061,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 'alwaysone')) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, alwaysone, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), alwaysone)) AS tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1, val1, 12, alwaysone), theta_sketch_agg(key2, 12), alwaysone))#x] +Aggregate [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, alwaysone, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), alwaysone)) AS tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch_agg_double(key1, val1, 12, alwaysone), theta_sketch_agg(key2, 12, QUICKSELECT), alwaysone))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -1073,7 +1073,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 'max')) FROM t_int_int_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, max, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), max)) AS tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, max), theta_sketch_agg(key2, 12), max))#x] +Aggregate [tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, max, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), max)) AS tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, max), theta_sketch_agg(key2, 12, QUICKSELECT), max))#x] +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -1085,7 +1085,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 'min')) FROM t_int_int_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, min, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), min)) AS tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, min), theta_sketch_agg(key2, 12), min))#x] +Aggregate [tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, min, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), min)) AS tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, min), theta_sketch_agg(key2, 12, QUICKSELECT), min))#x] +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -1097,7 +1097,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 'alwaysone')) FROM t_int_int_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, alwaysone, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), alwaysone)) AS tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, alwaysone), theta_sketch_agg(key2, 12), alwaysone))#x] +Aggregate [tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, alwaysone, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), alwaysone)) AS tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, alwaysone), theta_sketch_agg(key2, 12, QUICKSELECT), alwaysone))#x] +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -2077,7 +2077,7 @@ WithCTE : +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet :- CTERelationDef xxxx, false : +- SubqueryAlias empty_theta -: +- Aggregate [theta_sketch_agg(key2#x, 12, 0, 0) AS theta_sketch#x] +: +- Aggregate [theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0) AS theta_sketch#x] : +- Filter (key2#x > 100) : +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 : +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -2112,7 +2112,7 @@ WithCTE : +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet :- CTERelationDef xxxx, false : +- SubqueryAlias non_empty_theta -: +- Aggregate [theta_sketch_agg(key2#x, 12, 0, 0) AS theta_sketch#x] +: +- Aggregate [theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0) AS theta_sketch#x] : +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 : +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet +- Project [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch#x, theta_sketch#x, 12, sum)) AS estimate#x, tuple_sketch_summary_double(tuple_union_theta_double(tuple_sketch#x, theta_sketch#x, 12, sum), sum) AS summary#x] @@ -2145,7 +2145,7 @@ WithCTE : +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet :- CTERelationDef xxxx, false : +- SubqueryAlias empty_theta -: +- Aggregate [theta_sketch_agg(key2#x, 12, 0, 0) AS theta_sketch#x] +: +- Aggregate [theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0) AS theta_sketch#x] : +- Filter (key2#x > 100) : +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 : +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -2177,7 +2177,7 @@ WithCTE : +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet :- CTERelationDef xxxx, false : +- SubqueryAlias empty_theta -: +- Aggregate [theta_sketch_agg(key2#x, 12, 0, 0) AS theta_sketch#x] +: +- Aggregate [theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0) AS theta_sketch#x] : +- Filter (key2#x > 100) : +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 : +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -2210,7 +2210,7 @@ WithCTE : +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet :- CTERelationDef xxxx, false : +- SubqueryAlias non_empty_theta -: +- Aggregate [theta_sketch_agg(key2#x, 12, 0, 0) AS theta_sketch#x] +: +- Aggregate [theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0) AS theta_sketch#x] : +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 : +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet +- Project [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch#x, theta_sketch#x, sum)) AS estimate#x] @@ -2241,7 +2241,7 @@ WithCTE : +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet :- CTERelationDef xxxx, false : +- SubqueryAlias empty_theta -: +- Aggregate [theta_sketch_agg(key2#x, 12, 0, 0) AS theta_sketch#x] +: +- Aggregate [theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0) AS theta_sketch#x] : +- Filter (key2#x > 100) : +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 : +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -2275,7 +2275,7 @@ WithCTE : +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet :- CTERelationDef xxxx, false : +- SubqueryAlias empty_theta -: +- Aggregate [theta_sketch_agg(key2#x, 12, 0, 0) AS theta_sketch#x] +: +- Aggregate [theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0) AS theta_sketch#x] : +- Filter (key2#x > 100) : +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 : +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -2308,7 +2308,7 @@ WithCTE : +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet :- CTERelationDef xxxx, false : +- SubqueryAlias non_empty_theta -: +- Aggregate [theta_sketch_agg(key2#x, 12, 0, 0) AS theta_sketch#x] +: +- Aggregate [theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0) AS theta_sketch#x] : +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 : +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet +- Project [tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch#x, theta_sketch#x)) AS estimate#x] @@ -2341,7 +2341,7 @@ WithCTE : +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet :- CTERelationDef xxxx, false : +- SubqueryAlias empty_theta -: +- Aggregate [theta_sketch_agg(key2#x, 12, 0, 0) AS theta_sketch#x] +: +- Aggregate [theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0) AS theta_sketch#x] : +- Filter (key2#x > 100) : +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 : +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -2377,7 +2377,7 @@ WithCTE : +- LocalRelation [key#x, val#x] :- CTERelationDef xxxx, false : +- SubqueryAlias theta_sketch_data -: +- Aggregate [theta_sketch_agg(key#x, 12, 0, 0) AS theta_sketch#x] +: +- Aggregate [theta_sketch_agg(key#x, 12, QUICKSELECT, 0, 0) AS theta_sketch#x] : +- SubqueryAlias tab : +- LocalRelation [key#x] +- Project [tuple_sketch_estimate_double(tuple_intersection_theta_double(tuple_sketch#x, theta_sketch#x, sum)) AS estimate#x, tuple_sketch_summary_double(tuple_intersection_theta_double(tuple_sketch#x, theta_sketch#x, sum), sum) AS summary#x] @@ -2412,7 +2412,7 @@ WithCTE : +- LocalRelation [key#x, val#x] :- CTERelationDef xxxx, false : +- SubqueryAlias theta_sketch_data -: +- Aggregate [theta_sketch_agg(key#x, 12, 0, 0) AS theta_sketch#x] +: +- Aggregate [theta_sketch_agg(key#x, 12, QUICKSELECT, 0, 0) AS theta_sketch#x] : +- SubqueryAlias tab : +- LocalRelation [key#x] +- Project [tuple_sketch_estimate_double(tuple_difference_theta_double(tuple_sketch#x, theta_sketch#x)) AS estimate#x, tuple_sketch_summary_double(tuple_difference_theta_double(tuple_sketch#x, theta_sketch#x), sum) AS summary#x] @@ -2447,7 +2447,7 @@ WithCTE : +- LocalRelation [key#x, val#x] :- CTERelationDef xxxx, false : +- SubqueryAlias theta_sketch_data -: +- Aggregate [theta_sketch_agg(key#x, 12, 0, 0) AS theta_sketch#x] +: +- Aggregate [theta_sketch_agg(key#x, 12, QUICKSELECT, 0, 0) AS theta_sketch#x] : +- SubqueryAlias tab : +- LocalRelation [key#x] +- Project [tuple_sketch_estimate_integer(tuple_intersection_theta_integer(tuple_sketch#x, theta_sketch#x, max)) AS estimate#x, tuple_sketch_summary_integer(tuple_intersection_theta_integer(tuple_sketch#x, theta_sketch#x, max), sum) AS summary#xL] @@ -2482,7 +2482,7 @@ WithCTE : +- LocalRelation [key#x, val#x] :- CTERelationDef xxxx, false : +- SubqueryAlias theta_sketch_data -: +- Aggregate [theta_sketch_agg(key#x, 12, 0, 0) AS theta_sketch#x] +: +- Aggregate [theta_sketch_agg(key#x, 12, QUICKSELECT, 0, 0) AS theta_sketch#x] : +- SubqueryAlias tab : +- LocalRelation [key#x] +- Project [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch#x, theta_sketch#x, 12, min)) AS estimate#x, tuple_sketch_summary_double(tuple_union_theta_double(tuple_sketch#x, theta_sketch#x, 12, min), min) AS summary#x] @@ -2517,7 +2517,7 @@ WithCTE : +- LocalRelation [key#x, val#x] :- CTERelationDef xxxx, false : +- SubqueryAlias theta_sketch_data -: +- Aggregate [theta_sketch_agg(key#x, 12, 0, 0) AS theta_sketch#x] +: +- Aggregate [theta_sketch_agg(key#x, 12, QUICKSELECT, 0, 0) AS theta_sketch#x] : +- SubqueryAlias tab : +- LocalRelation [key#x] +- Project [tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch#x, theta_sketch#x, 12, max)) AS estimate#x, tuple_sketch_summary_integer(tuple_union_theta_integer(tuple_sketch#x, theta_sketch#x, 12, max), max) AS summary#xL] @@ -3039,7 +3039,7 @@ SELECT tuple_sketch_estimate_double( second => theta_sketch_agg(key2))) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 12, sum)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12), 12, sum))#x] +Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 12, sum)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), 12, sum))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3052,7 +3052,7 @@ SELECT tuple_sketch_estimate_double( lgNomEntries => 14)) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 14, sum)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12), 14, sum))#x] +Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 14, sum)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), 14, sum))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3065,7 +3065,7 @@ SELECT tuple_sketch_estimate_double( mode => 'max')) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 12, max)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12), 12, max))#x] +Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 12, max)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), 12, max))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3079,7 +3079,7 @@ SELECT tuple_sketch_estimate_double( first => tuple_sketch_agg_double(key1, val1))) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 15, min)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12), 15, min))#x] +Aggregate [tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 15, min)) AS tuple_sketch_estimate_double(tuple_union_theta_double(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), 15, min))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3092,7 +3092,7 @@ SELECT tuple_sketch_estimate_integer( lgNomEntries => 14)) FROM t_int_int_1_5_through_7_11 -- !query analysis -Aggregate [tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 14, sum)) AS tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, sum), theta_sketch_agg(key2, 12), 14, sum))#x] +Aggregate [tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 14, sum)) AS tuple_sketch_estimate_integer(tuple_union_theta_integer(tuple_sketch_agg_integer(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), 14, sum))#x] +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3651,7 +3651,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "inputType" : "\"STRING\"", "paramIndex" : "third", "requiredType" : "\"INT\"", - "sqlExpr" : "\"tuple_union_theta_double(tuple_sketch_agg_double(col1, 1.0, 12, sum), theta_sketch_agg(col2, 12), invalid, sum)\"" + "sqlExpr" : "\"tuple_union_theta_double(tuple_sketch_agg_double(col1, 1.0, 12, sum), theta_sketch_agg(col2, 12, QUICKSELECT), invalid, sum)\"" }, "queryContext" : [ { "objectType" : "", @@ -3679,7 +3679,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "messageParameters" : { "hint" : "", "msg" : "[TUPLE_INVALID_SKETCH_MODE] Invalid call to `tuple_union_theta_double`: mode 'invalid_mode' is not supported. Valid modes are: sum, min, max, alwaysone. SQLSTATE: 22023", - "sqlExpr" : "\"tuple_union_theta_double(tuple_sketch_agg_double(col1, 1.0, 12, sum), theta_sketch_agg(col2, 12), 12, invalid_mode)\"" + "sqlExpr" : "\"tuple_union_theta_double(tuple_sketch_agg_double(col1, 1.0, 12, sum), theta_sketch_agg(col2, 12, QUICKSELECT), 12, invalid_mode)\"" }, "queryContext" : [ { "objectType" : "", @@ -3804,7 +3804,7 @@ SELECT tuple_union_theta_integer( theta_sketch_agg(key2)) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_union_theta_integer(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), 12, sum) AS tuple_union_theta_integer(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12), 12, sum)#x] +Aggregate [tuple_union_theta_integer(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), 12, sum) AS tuple_union_theta_integer(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), 12, sum)#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3815,7 +3815,7 @@ SELECT tuple_intersection_theta_integer( theta_sketch_agg(key2)) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_intersection_theta_integer(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0), sum) AS tuple_intersection_theta_integer(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12), sum)#x] +Aggregate [tuple_intersection_theta_integer(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0), sum) AS tuple_intersection_theta_integer(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT), sum)#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3826,7 +3826,7 @@ SELECT tuple_difference_theta_integer( theta_sketch_agg(key2)) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_difference_theta_integer(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, 0, 0)) AS tuple_difference_theta_integer(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12))#x] +Aggregate [tuple_difference_theta_integer(tuple_sketch_agg_double(key1#x, val1#x, 12, sum, 0, 0), theta_sketch_agg(key2#x, 12, QUICKSELECT, 0, 0)) AS tuple_difference_theta_integer(tuple_sketch_agg_double(key1, val1, 12, sum), theta_sketch_agg(key2, 12, QUICKSELECT))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3835,7 +3835,7 @@ Aggregate [tuple_difference_theta_integer(tuple_sketch_agg_double(key1#x, val1#x SELECT tuple_union_theta_double(CAST(NULL AS BINARY), theta_sketch_agg(key1)) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_union_theta_double(cast(null as binary), theta_sketch_agg(key1#x, 12, 0, 0), 12, sum) AS tuple_union_theta_double(CAST(NULL AS BINARY), theta_sketch_agg(key1, 12), 12, sum)#x] +Aggregate [tuple_union_theta_double(cast(null as binary), theta_sketch_agg(key1#x, 12, QUICKSELECT, 0, 0), 12, sum) AS tuple_union_theta_double(CAST(NULL AS BINARY), theta_sketch_agg(key1, 12, QUICKSELECT), 12, sum)#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3860,7 +3860,7 @@ Project [tuple_union_theta_integer(cast(null as binary), cast(null as binary), 1 SELECT tuple_intersection_theta_double(CAST(NULL AS BINARY), theta_sketch_agg(key1)) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_intersection_theta_double(cast(null as binary), theta_sketch_agg(key1#x, 12, 0, 0), sum) AS tuple_intersection_theta_double(CAST(NULL AS BINARY), theta_sketch_agg(key1, 12), sum)#x] +Aggregate [tuple_intersection_theta_double(cast(null as binary), theta_sketch_agg(key1#x, 12, QUICKSELECT, 0, 0), sum) AS tuple_intersection_theta_double(CAST(NULL AS BINARY), theta_sketch_agg(key1, 12, QUICKSELECT), sum)#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3885,7 +3885,7 @@ Project [tuple_intersection_theta_integer(cast(null as binary), cast(null as bin SELECT tuple_difference_theta_double(CAST(NULL AS BINARY), theta_sketch_agg(key1)) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_difference_theta_double(cast(null as binary), theta_sketch_agg(key1#x, 12, 0, 0)) AS tuple_difference_theta_double(CAST(NULL AS BINARY), theta_sketch_agg(key1, 12))#x] +Aggregate [tuple_difference_theta_double(cast(null as binary), theta_sketch_agg(key1#x, 12, QUICKSELECT, 0, 0)) AS tuple_difference_theta_double(CAST(NULL AS BINARY), theta_sketch_agg(key1, 12, QUICKSELECT))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3910,7 +3910,7 @@ Project [tuple_difference_theta_integer(cast(null as binary), cast(null as binar SELECT tuple_union_theta_double(theta_sketch_agg(key1), tuple_sketch_agg_double(key2, val2)) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_union_theta_double(theta_sketch_agg(key1#x, 12, 0, 0), tuple_sketch_agg_double(key2#x, val2#x, 12, sum, 0, 0), 12, sum) AS tuple_union_theta_double(theta_sketch_agg(key1, 12), tuple_sketch_agg_double(key2, val2, 12, sum), 12, sum)#x] +Aggregate [tuple_union_theta_double(theta_sketch_agg(key1#x, 12, QUICKSELECT, 0, 0), tuple_sketch_agg_double(key2#x, val2#x, 12, sum, 0, 0), 12, sum) AS tuple_union_theta_double(theta_sketch_agg(key1, 12, QUICKSELECT), tuple_sketch_agg_double(key2, val2, 12, sum), 12, sum)#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3919,7 +3919,7 @@ Aggregate [tuple_union_theta_double(theta_sketch_agg(key1#x, 12, 0, 0), tuple_sk SELECT tuple_union_theta_integer(theta_sketch_agg(key1), tuple_sketch_agg_integer(key2, val2)) FROM t_int_int_1_5_through_7_11 -- !query analysis -Aggregate [tuple_union_theta_integer(theta_sketch_agg(key1#x, 12, 0, 0), tuple_sketch_agg_integer(key2#x, val2#x, 12, sum, 0, 0), 12, sum) AS tuple_union_theta_integer(theta_sketch_agg(key1, 12), tuple_sketch_agg_integer(key2, val2, 12, sum), 12, sum)#x] +Aggregate [tuple_union_theta_integer(theta_sketch_agg(key1#x, 12, QUICKSELECT, 0, 0), tuple_sketch_agg_integer(key2#x, val2#x, 12, sum, 0, 0), 12, sum) AS tuple_union_theta_integer(theta_sketch_agg(key1, 12, QUICKSELECT), tuple_sketch_agg_integer(key2, val2, 12, sum), 12, sum)#x] +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3928,7 +3928,7 @@ Aggregate [tuple_union_theta_integer(theta_sketch_agg(key1#x, 12, 0, 0), tuple_s SELECT tuple_intersection_theta_double(theta_sketch_agg(key1), tuple_sketch_agg_double(key2, val2)) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_intersection_theta_double(theta_sketch_agg(key1#x, 12, 0, 0), tuple_sketch_agg_double(key2#x, val2#x, 12, sum, 0, 0), sum) AS tuple_intersection_theta_double(theta_sketch_agg(key1, 12), tuple_sketch_agg_double(key2, val2, 12, sum), sum)#x] +Aggregate [tuple_intersection_theta_double(theta_sketch_agg(key1#x, 12, QUICKSELECT, 0, 0), tuple_sketch_agg_double(key2#x, val2#x, 12, sum, 0, 0), sum) AS tuple_intersection_theta_double(theta_sketch_agg(key1, 12, QUICKSELECT), tuple_sketch_agg_double(key2, val2, 12, sum), sum)#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3937,7 +3937,7 @@ Aggregate [tuple_intersection_theta_double(theta_sketch_agg(key1#x, 12, 0, 0), t SELECT tuple_intersection_theta_integer(theta_sketch_agg(key1), tuple_sketch_agg_integer(key2, val2)) FROM t_int_int_1_5_through_7_11 -- !query analysis -Aggregate [tuple_intersection_theta_integer(theta_sketch_agg(key1#x, 12, 0, 0), tuple_sketch_agg_integer(key2#x, val2#x, 12, sum, 0, 0), sum) AS tuple_intersection_theta_integer(theta_sketch_agg(key1, 12), tuple_sketch_agg_integer(key2, val2, 12, sum), sum)#x] +Aggregate [tuple_intersection_theta_integer(theta_sketch_agg(key1#x, 12, QUICKSELECT, 0, 0), tuple_sketch_agg_integer(key2#x, val2#x, 12, sum, 0, 0), sum) AS tuple_intersection_theta_integer(theta_sketch_agg(key1, 12, QUICKSELECT), tuple_sketch_agg_integer(key2, val2, 12, sum), sum)#x] +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3946,7 +3946,7 @@ Aggregate [tuple_intersection_theta_integer(theta_sketch_agg(key1#x, 12, 0, 0), SELECT tuple_difference_theta_double(theta_sketch_agg(key1), tuple_sketch_agg_double(key2, val2)) FROM t_int_double_1_5_through_7_11 -- !query analysis -Aggregate [tuple_difference_theta_double(theta_sketch_agg(key1#x, 12, 0, 0), tuple_sketch_agg_double(key2#x, val2#x, 12, sum, 0, 0)) AS tuple_difference_theta_double(theta_sketch_agg(key1, 12), tuple_sketch_agg_double(key2, val2, 12, sum))#x] +Aggregate [tuple_difference_theta_double(theta_sketch_agg(key1#x, 12, QUICKSELECT, 0, 0), tuple_sketch_agg_double(key2#x, val2#x, 12, sum, 0, 0)) AS tuple_difference_theta_double(theta_sketch_agg(key1, 12, QUICKSELECT), tuple_sketch_agg_double(key2, val2, 12, sum))#x] +- SubqueryAlias spark_catalog.default.t_int_double_1_5_through_7_11 +- Relation spark_catalog.default.t_int_double_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet @@ -3955,7 +3955,7 @@ Aggregate [tuple_difference_theta_double(theta_sketch_agg(key1#x, 12, 0, 0), tup SELECT tuple_difference_theta_integer(theta_sketch_agg(key1), tuple_sketch_agg_integer(key2, val2)) FROM t_int_int_1_5_through_7_11 -- !query analysis -Aggregate [tuple_difference_theta_integer(theta_sketch_agg(key1#x, 12, 0, 0), tuple_sketch_agg_integer(key2#x, val2#x, 12, sum, 0, 0)) AS tuple_difference_theta_integer(theta_sketch_agg(key1, 12), tuple_sketch_agg_integer(key2, val2, 12, sum))#x] +Aggregate [tuple_difference_theta_integer(theta_sketch_agg(key1#x, 12, QUICKSELECT, 0, 0), tuple_sketch_agg_integer(key2#x, val2#x, 12, sum, 0, 0)) AS tuple_difference_theta_integer(theta_sketch_agg(key1, 12, QUICKSELECT), tuple_sketch_agg_integer(key2, val2, 12, sum))#x] +- SubqueryAlias spark_catalog.default.t_int_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_int_1_5_through_7_11[key1#x,val1#x,key2#x,val2#x] parquet diff --git a/sql/core/src/test/resources/sql-tests/inputs/thetasketch.sql b/sql/core/src/test/resources/sql-tests/inputs/thetasketch.sql index 4782d2017f2a6..e4e78c57fc2e8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/thetasketch.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/thetasketch.sql @@ -465,6 +465,10 @@ FROM VALUES (15), (16), (17) tab(col); SELECT theta_sketch_agg(col, CAST(col AS INT)) AS lg_nom_entries_non_constant FROM VALUES (15), (16), (17) tab(col); +-- family parameter is not foldable (non-constant) +SELECT theta_sketch_agg(col, 12, CAST(col AS STRING)) AS family_non_constant +FROM VALUES ('QUICKSELECT'), ('ALPHA'), ('QUICKSELECT') tab(col); + -- lgNomEntries parameter has wrong type (STRING instead of INT) SELECT theta_sketch_agg(col, '15') FROM VALUES (50), (60), (60) tab(col); diff --git a/sql/core/src/test/resources/sql-tests/results/thetasketch.sql.out b/sql/core/src/test/resources/sql-tests/results/thetasketch.sql.out index f98ed64da391b..f04287233872f 100644 --- a/sql/core/src/test/resources/sql-tests/results/thetasketch.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/thetasketch.sql.out @@ -182,7 +182,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_array_int_1_3_through_4_6 -- !query schema -struct +struct -- !query output 4 @@ -190,7 +190,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col2)) FROM t_array_long_1_3_through_4_6 -- !query schema -struct +struct -- !query output 4 @@ -198,7 +198,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_binary_a_b_through_e_f -- !query schema -struct +struct -- !query output 5 @@ -206,7 +206,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 5 @@ -214,7 +214,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col2)) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 5 @@ -222,7 +222,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1, 22)) FROM t_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 7 @@ -230,7 +230,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_long_1_5_through_7_11 -- !query schema -struct +struct -- !query output 7 @@ -238,7 +238,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_string_a_d_through_e_h -- !query schema -struct +struct -- !query output 5 @@ -249,7 +249,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11 @@ -260,7 +260,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 15), theta_sketch_agg(col2))) FROM t_long_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11 @@ -271,7 +271,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 8 @@ -282,7 +282,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 6), theta_sketch_agg(col2, 15), 15)) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 8 @@ -293,7 +293,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_string_a_d_through_e_h -- !query schema -struct +struct -- !query output 8 @@ -304,7 +304,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2), 20)) FROM t_binary_a_b_through_e_f -- !query schema -struct +struct -- !query output 6 @@ -315,7 +315,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_array_int_1_3_through_4_6 -- !query schema -struct +struct -- !query output 6 @@ -326,7 +326,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 13))) FROM t_array_long_1_3_through_4_6 -- !query schema -struct +struct -- !query output 6 @@ -337,7 +337,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 3 @@ -348,7 +348,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 5), theta_sketch_agg(col2, 12))) FROM t_long_1_5_through_7_11 -- !query schema -struct +struct -- !query output 3 @@ -359,7 +359,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 2 @@ -370,7 +370,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 5), theta_sketch_agg(col2))) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 2 @@ -381,7 +381,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_string_a_d_through_e_h -- !query schema -struct +struct -- !query output 2 @@ -392,7 +392,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 22))) FROM t_binary_a_b_through_e_f -- !query schema -struct +struct -- !query output 4 @@ -403,7 +403,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_array_int_1_3_through_4_6 -- !query schema -struct +struct -- !query output 2 @@ -414,7 +414,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 10))) FROM t_array_long_1_3_through_4_6 -- !query schema -struct +struct -- !query output 2 @@ -425,7 +425,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 4 @@ -436,7 +436,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 5))) FROM t_long_1_5_through_7_11 -- !query schema -struct +struct -- !query output 4 @@ -447,7 +447,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 3 @@ -458,7 +458,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 12), theta_sketch_agg(col2))) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 3 @@ -469,7 +469,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_string_a_d_through_e_h -- !query schema -struct +struct -- !query output 3 @@ -480,7 +480,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 6), theta_sketch_agg(col2, 8))) FROM t_binary_a_b_through_e_f -- !query schema -struct +struct -- !query output 1 @@ -491,7 +491,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_array_int_1_3_through_4_6 -- !query schema -struct +struct -- !query output 2 @@ -502,7 +502,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 4))) FROM t_array_long_1_3_through_4_6 -- !query schema -struct +struct -- !query output 2 @@ -687,7 +687,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (1), (null), (2), (null), (3) tab(col) -- !query schema -struct +struct -- !query output 3 @@ -696,7 +696,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES ('test'), (null), ('null'), (null) tab(col) -- !query schema -struct +struct -- !query output 2 @@ -705,7 +705,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (100L), (null), (200L), (null), (300L) tab(col) -- !query schema -struct +struct -- !query output 3 @@ -714,7 +714,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(CAST(col AS DOUBLE))) FROM VALUES (1.1), (null), (2.2), (null), (3.3) tab(col) -- !query schema -struct +struct -- !query output 3 @@ -723,7 +723,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(CAST(col AS FLOAT))) FROM VALUES (1.5), (null), (2.5), (null), (3.5) tab(col) -- !query schema -struct +struct -- !query output 3 @@ -732,7 +732,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (X'AA'), (null), (X'BB'), (null), (X'CC') tab(col) -- !query schema -struct +struct -- !query output 3 @@ -741,7 +741,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(1, 2)), (null), (ARRAY(3, 4)), (null), (ARRAY(5, 6)) tab(col) -- !query schema -struct +struct -- !query output 3 @@ -750,7 +750,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(10L, 20L)), (null), (ARRAY(30L, 40L)), (null), (ARRAY(50L, 60L)) tab(col) -- !query schema -struct +struct -- !query output 3 @@ -759,7 +759,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(1, null)), (ARRAY(1)), (ARRAY(2, null, 3)), (ARRAY(4)) tab(col) -- !query schema -struct +struct -- !query output 4 @@ -768,7 +768,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(10L, null)), (ARRAY(10L)), (ARRAY(20L, null, 30L)), (ARRAY(40L)) tab(col) -- !query schema -struct +struct -- !query output 4 @@ -777,7 +777,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY()), (ARRAY(1, 2)), (ARRAY()), (ARRAY(3, 4)) tab(col) -- !query schema -struct +struct -- !query output 2 @@ -786,7 +786,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (''), ('a'), (''), ('b'), ('c') tab(col) -- !query schema -struct +struct -- !query output 3 @@ -795,7 +795,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (X''), (X'01'), (X'02'), (X'03'), (CAST(' ' AS BINARY)), (X'e280'), (X'c1'), (X'c120') tab(col) -- !query schema -struct +struct -- !query output 7 @@ -1011,6 +1011,22 @@ org.apache.spark.SparkRuntimeException } +-- !query +SELECT theta_sketch_agg(col, 12, CAST(col AS STRING)) AS family_non_constant +FROM VALUES ('QUICKSELECT'), ('ALPHA'), ('QUICKSELECT') tab(col) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkRuntimeException +{ + "errorClass" : "THETA_FAMILY_MUST_BE_CONSTANT", + "sqlState" : "42K0E", + "messageParameters" : { + "function" : "`theta_sketch_agg`" + } +} + + -- !query SELECT theta_sketch_agg(col, '15') FROM VALUES (50), (60), (60) tab(col) @@ -1026,7 +1042,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "inputType" : "\"STRING\"", "paramIndex" : "second", "requiredType" : "\"INT\"", - "sqlExpr" : "\"theta_sketch_agg(col, 15)\"" + "sqlExpr" : "\"theta_sketch_agg(col, 15, QUICKSELECT)\"" }, "queryContext" : [ { "objectType" : "", @@ -1148,7 +1164,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "inputType" : "\"STRING\"", "paramIndex" : "third", "requiredType" : "\"INT\"", - "sqlExpr" : "\"theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), invalid)\"" + "sqlExpr" : "\"theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), invalid)\"" }, "queryContext" : [ { "objectType" : "", @@ -1180,7 +1196,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "inputType" : "\"STRING\"", "paramIndex" : "second", "requiredType" : "\"BINARY\"", - "sqlExpr" : "\"theta_intersection(theta_sketch_agg(col1, 12), invalid_sketch)\"" + "sqlExpr" : "\"theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), invalid_sketch)\"" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/results/tuplesketch.sql.out b/sql/core/src/test/resources/sql-tests/results/tuplesketch.sql.out index b009103d5f9f7..6af2637ed8952 100644 --- a/sql/core/src/test/resources/sql-tests/results/tuplesketch.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/tuplesketch.sql.out @@ -757,7 +757,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -769,7 +769,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 15)) FROM t_long_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -781,7 +781,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_double_double_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 8.0 @@ -793,7 +793,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 14)) FROM t_string_double_a_d_through_e_h -- !query schema -struct +struct -- !query output 8.0 @@ -805,7 +805,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 12, 'sum')) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -817,7 +817,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2))) FROM t_int_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -829,7 +829,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 12, 'sum')) FROM t_int_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -841,7 +841,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 3.0 @@ -853,7 +853,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_long_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 3.0 @@ -865,7 +865,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_double_double_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 2.0 @@ -877,7 +877,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_string_double_a_d_through_e_h -- !query schema -struct +struct -- !query output 2.0 @@ -889,7 +889,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 'min')) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 3.0 @@ -901,7 +901,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2))) FROM t_int_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 3.0 @@ -913,7 +913,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 'sum')) FROM t_int_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 3.0 @@ -925,7 +925,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 4.0 @@ -937,7 +937,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_long_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 4.0 @@ -949,7 +949,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_double_double_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 3.0 @@ -961,7 +961,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2))) FROM t_string_double_a_d_through_e_h -- !query schema -struct +struct -- !query output 3.0 @@ -973,7 +973,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2))) FROM t_int_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 4.0 @@ -985,7 +985,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 12, 'max')) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -997,7 +997,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 12, 'min')) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -1009,7 +1009,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 12, 'alwaysone')) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -1021,7 +1021,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 12, 'max')) FROM t_int_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -1033,7 +1033,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 12, 'min')) FROM t_int_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -1045,7 +1045,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 12, 'alwaysone')) FROM t_int_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -1057,7 +1057,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 'max')) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 3.0 @@ -1069,7 +1069,7 @@ SELECT tuple_sketch_estimate_double( theta_sketch_agg(key2), 'alwaysone')) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 3.0 @@ -1081,7 +1081,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 'max')) FROM t_int_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 3.0 @@ -1093,7 +1093,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 'min')) FROM t_int_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 3.0 @@ -1105,7 +1105,7 @@ SELECT tuple_sketch_estimate_integer( theta_sketch_agg(key2), 'alwaysone')) FROM t_int_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 3.0 @@ -2601,7 +2601,7 @@ SELECT tuple_sketch_estimate_double( second => theta_sketch_agg(key2))) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -2614,7 +2614,7 @@ SELECT tuple_sketch_estimate_double( lgNomEntries => 14)) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -2627,7 +2627,7 @@ SELECT tuple_sketch_estimate_double( mode => 'max')) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -2641,7 +2641,7 @@ SELECT tuple_sketch_estimate_double( first => tuple_sketch_agg_double(key1, val1))) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -2654,7 +2654,7 @@ SELECT tuple_sketch_estimate_integer( lgNomEntries => 14)) FROM t_int_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11.0 @@ -3366,7 +3366,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "inputType" : "\"STRING\"", "paramIndex" : "third", "requiredType" : "\"INT\"", - "sqlExpr" : "\"tuple_union_theta_double(tuple_sketch_agg_double(col1, 1.0, 12, sum), theta_sketch_agg(col2, 12), invalid, sum)\"" + "sqlExpr" : "\"tuple_union_theta_double(tuple_sketch_agg_double(col1, 1.0, 12, sum), theta_sketch_agg(col2, 12, QUICKSELECT), invalid, sum)\"" }, "queryContext" : [ { "objectType" : "", @@ -3396,7 +3396,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "messageParameters" : { "hint" : "", "msg" : "[TUPLE_INVALID_SKETCH_MODE] Invalid call to `tuple_union_theta_double`: mode 'invalid_mode' is not supported. Valid modes are: sum, min, max, alwaysone. SQLSTATE: 22023", - "sqlExpr" : "\"tuple_union_theta_double(tuple_sketch_agg_double(col1, 1.0, 12, sum), theta_sketch_agg(col2, 12), 12, invalid_mode)\"" + "sqlExpr" : "\"tuple_union_theta_double(tuple_sketch_agg_double(col1, 1.0, 12, sum), theta_sketch_agg(col2, 12, QUICKSELECT), 12, invalid_mode)\"" }, "queryContext" : [ { "objectType" : "", @@ -3603,7 +3603,7 @@ org.apache.spark.SparkRuntimeException SELECT tuple_union_theta_double(CAST(NULL AS BINARY), theta_sketch_agg(key1)) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output NULL @@ -3629,7 +3629,7 @@ NULL SELECT tuple_intersection_theta_double(CAST(NULL AS BINARY), theta_sketch_agg(key1)) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output NULL @@ -3655,7 +3655,7 @@ NULL SELECT tuple_difference_theta_double(CAST(NULL AS BINARY), theta_sketch_agg(key1)) FROM t_int_double_1_5_through_7_11 -- !query schema -struct +struct -- !query output NULL diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala index a55638b2431c5..c9f69e8a370ea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -2817,7 +2817,7 @@ class DataFrameAggregateSuite extends QueryTest val res = sql(""" |select | id, - | theta_sketch_agg(value, 'text') + | theta_sketch_agg(value, 'text', 'ALPHA') |from | df1 |group by 1 @@ -2826,13 +2826,29 @@ class DataFrameAggregateSuite extends QueryTest }, condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", parameters = Map( - "sqlExpr" -> "\"theta_sketch_agg(value, text)\"", + "sqlExpr" -> "\"theta_sketch_agg(value, text, ALPHA)\"", "paramIndex" -> "second", "inputSql" -> "\"text\"", "inputType" -> "\"STRING\"", "requiredType" -> "\"INT\""), context = - ExpectedContext(fragment = "theta_sketch_agg(value, 'text')", start = 14, stop = 44)) + ExpectedContext(fragment = "theta_sketch_agg(value, 'text', 'ALPHA')", + start = 14, stop = 53)) + + // Test invalid family names + checkError( + exception = intercept[SparkRuntimeException] { + df1.groupBy("id") + .agg(theta_sketch_agg("value", 12, "INVALID_FAMILY").as("sketch")) + .collect() + }, + condition = "THETA_INVALID_FAMILY", + parameters = Map( + "function" -> "`theta_sketch_agg`", + "value" -> "'INVALID_FAMILY'", + "validFamilies" -> "`QUICKSELECT`, `ALPHA`" + ) + ) checkError( exception = intercept[AnalysisException] {