diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index 9fd429d7c..62c958bfa 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -170,7 +170,7 @@ message JoinRel { JOIN_TYPE_RIGHT = 4; JOIN_TYPE_SEMI = 5; JOIN_TYPE_ANTI = 6; - // This join is useful for nested sub-queries where we need exactly one tuple in output (or throw exception) + // This join is useful for nested sub-queries where we need exactly one record in output (or throw exception) // See Section 3.2 of https://15721.courses.cs.cmu.edu/spring2018/papers/16-optimizer2/hyperjoins-btw2017.pdf JOIN_TYPE_SINGLE = 7; } @@ -437,7 +437,10 @@ message Rel { ExtensionMultiRel extension_multi = 10; ExtensionLeafRel extension_leaf = 11; CrossRel cross = 12; - //Physical relations + ReferenceRel reference = 21; + WriteRel write = 19; + DdlRel ddl = 20; + // Physical relations HashJoinRel hash_join = 13; MergeJoinRel merge_join = 14; NestedLoopJoinRel nested_loop_join = 18; @@ -509,7 +512,7 @@ message DdlRel { //TODO add PK/constraints/indexes/etc..? } -// The operator that modifies the content of a database (operates on 1 table at a time, but tuple-selection/source can be +// The operator that modifies the content of a database (operates on 1 table at a time, but record-selection/source can be // based on joining of multiple tables). message WriteRel { // Definition of which TABLE we are operating on @@ -524,7 +527,7 @@ message WriteRel { // The type of operation to perform WriteOp op = 4; - // The relation that determines the tuples to add/remove/modify + // The relation that determines the records to add/remove/modify // the schema must match with table_schema. Default values must be explicitly stated // in a ProjectRel at the top of the input. The match must also // occur in case of DELETE to ensure multi-engine plans are unequivocal. @@ -535,26 +538,26 @@ message WriteRel { enum WriteOp { WRITE_OP_UNSPECIFIED = 0; - // The insert of new tuples in a table + // The insert of new records in a table WRITE_OP_INSERT = 1; - // The removal of tuples from a table + // The removal of records from a table WRITE_OP_DELETE = 2; - // The modification of existing tuples within a table + // The modification of existing records within a table WRITE_OP_UPDATE = 3; - // The Creation of a new table, and the insert of new tuples in the table + // The Creation of a new table, and the insert of new records in the table WRITE_OP_CTAS = 4; } enum OutputMode { OUTPUT_MODE_UNSPECIFIED = 0; - // return no tuples at all + // return no records at all OUTPUT_MODE_NO_OUTPUT = 1; - // this mode makes the operator return all the tuple INSERTED/DELETED/UPDATED by the operator. + // this mode makes the operator return all the record INSERTED/DELETED/UPDATED by the operator. // The operator returns the AFTER-image of any change. This can be further manipulated by operators upstreams - // (e.g., retunring the typical "count of modified tuples"). + // (e.g., retunring the typical "count of modified records"). // For scenarios in which the BEFORE image is required, the user must implement a spool (via references to // subplans in the body of the Rel input) and return those with anounter PlanRel.relations. - OUTPUT_MODE_MODIFIED_TUPLES = 2; + OUTPUT_MODE_MODIFIED_RECORDS = 2; } } @@ -930,7 +933,7 @@ message Expression { // When one or more partition expressions are specified, two records are // considered to be in the same partition if and only if these expressions - // yield an equal tuple of values for both. When computing the window + // yield an equal record of values for both. When computing the window // function, only the subset of records within the bounds that are also in // the same partition as the current record are aggregated. repeated Expression partitions = 2; @@ -1409,10 +1412,10 @@ message AggregateFunction { // Use only distinct values in the aggregation calculation. AGGREGATION_INVOCATION_DISTINCT = 2; } +} - // This rel is used to create references, - // in case we refer to a RelRoot field names will be ignored - message ReferenceRel { - int32 subtree_ordinal = 1; - } +// This rel is used to create references, +// in case we refer to a RelRoot field names will be ignored +message ReferenceRel { + int32 subtree_ordinal = 1; } diff --git a/site/docs/relations/logical_relations.md b/site/docs/relations/logical_relations.md index 410802f3f..f5f79525c 100644 --- a/site/docs/relations/logical_relations.md +++ b/site/docs/relations/logical_relations.md @@ -375,14 +375,14 @@ doing `ReferenceRel(0) JOIN D`. This allows to avoid the redundancy of `A JOIN B ## Write Operator -The write operator is an operator that consumes one output and writes it to storage. This can range from writing to a Parquet file, to INSERT/DELETE/UPDATE in a database. +The write operator is an operator that consumes one input and writes it to storage. This can range from writing to a Parquet file, to INSERT/DELETE/UPDATE in a database. -| Signature | Value | -| -------------------- |---------------------------------------------------------| -| Inputs | 1 | -| Outputs | 1 | -| Property Maintenance | Output depends on OutputMode (none, or modified tuples) | -| Direct Output Order | Unchanged from input | +| Signature | Value | +| -------------------- |--------------------------------------------------------- | +| Inputs | 1 | +| Outputs | 1 | +| Property Maintenance | Output depends on OutputMode (none, or modified records) | +| Direct Output Order | Unchanged from input | ### Write Properties @@ -392,8 +392,8 @@ The write operator is an operator that consumes one output and writes it to stor | Write Type | Definition of which object we are operating on (e.g., a fully-qualified table name). | Required | | CTAS Schema | The names of all the columns and their type for a CREATE TABLE AS. | Required only for CTAS | | Write Operator | Which type of operation we are performing (INSERT/DELETE/UPDATE/CTAS). | Required | -| Rel Input | The Rel representing which tuples we will be operating on (e.g., VALUES for an INSERT, or which tuples to DELETE, or tuples and after-image of their values for UPDATE). | Required | -| Output Mode | For views that modify a DB it is important to control, which tuples to "return". Common default is NO_OUTPUT where we return nothing. Alternatively, we can return MODIFIED_TUPLES, that can be further manipulated by layering more rels ontop of this WriteRel (e.g., to "count how many tuples were updated"). This also allows to return the after-image of the change. To return before-image (or both) one can use the reference mechanisms and have multiple return values. | Required for VIEW CREATE/CREATE_OR_REPLACE/ALTER | +| Rel Input | The Rel representing which records we will be operating on (e.g., VALUES for an INSERT, or which records to DELETE, or records and after-image of their values for UPDATE). | Required | +| Output Mode | For views that modify a DB it is important to control which records to "return". Common default is NO_OUTPUT where we return nothing. Alternatively, we can return MODIFIED_RECORDS, that can be further manipulated by layering more rels ontop of this WriteRel (e.g., to "count how many records were updated"). This also allows to return the after-image of the change. To return before-image (or both) one can use the reference mechanisms and have multiple return values. | Required for VIEW CREATE/CREATE_OR_REPLACE/ALTER | ### Write Definition Types