apache · alamb · Oct 14, 2025 · Oct 10, 2025 · Oct 14, 2025 · alamb
diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs
@@ -445,14 +445,16 @@ impl RecordBatch {
             })
             .collect::<Result<Vec<_>, _>>()?;
 
-        RecordBatch::try_new_with_options(
-            SchemaRef::new(projected_schema),
-            batch_fields,
-            &RecordBatchOptions {
-                match_field_names: true,
-                row_count: Some(self.row_count),
-            },
-        )
+        unsafe {
 // check that number of fields in schema match column length 
 if schema.fields().len() != columns.len() { 
     return Err(ArrowError::InvalidArgumentError(format!( 
         "number of columns({}) must match number of fields({}) in schema", 
         columns.len(), 
         schema.fields().len(), 
     ))); 
 } 
 let row_count = options 
     .row_count 
     .or_else(|| columns.first().map(|col| col.len())) 
     .ok_or_else(|| { 
         ArrowError::InvalidArgumentError( 
             "must either specify a row count or at least one column".to_string(), 
         ) 
     })?; 
 for (c, f) in columns.iter().zip(&schema.fields) { 
     if !f.is_nullable() && c.null_count() > 0 { 
         return Err(ArrowError::InvalidArgumentError(format!( 
             "Column '{}' is declared as non-nullable but contains null values", 
             f.name() 
         ))); 
     } 
 } 
 // check that all columns have the same row count 
 if columns.iter().any(|c| c.len() != row_count) { 
     let err = match options.row_count { 
         Some(_) => "all columns in a record batch must have the specified row count", 
         None => "all columns in a record batch must have the same length", 
     }; 
     return Err(ArrowError::InvalidArgumentError(err.to_string())); 
 } 
 // function for comparing column type and field type 
 // return true if 2 types are not matched 
 let type_not_match = if options.match_field_names { 
     |(_, (col_type, field_type)): &(usize, (&DataType, &DataType))| col_type != field_type 
 } else { 
     |(_, (col_type, field_type)): &(usize, (&DataType, &DataType))| { 
         !col_type.equals_datatype(field_type) 
     } 
 }; 
 // check that all columns match the schema 
 let not_match = columns 
     .iter() 
     .zip(schema.fields().iter()) 
     .map(|(col, field)| (col.data_type(), field.data_type())) 
     .enumerate() 
     .find(type_not_match); 
 if let Some((i, (col_type, field_type))) = not_match { 
     return Err(ArrowError::InvalidArgumentError(format!( 
         "column types must match schema types, expected {field_type} but found {col_type} at column index {i}" 
     ))); 
 } 
 // check that number of fields in schema match column length 
 if schema.fields().len() != columns.len() { 
     return Err(ArrowError::InvalidArgumentError(format!( 
         "number of columns({}) must match number of fields({}) in schema", 
         columns.len(), 
         schema.fields().len(), 
     ))); 
 } 
  
 let row_count = options 
     .row_count 
     .or_else(|| columns.first().map(|col| col.len())) 
     .ok_or_else(|| { 
         ArrowError::InvalidArgumentError( 
             "must either specify a row count or at least one column".to_string(), 
         ) 
     })?; 
  
 for (c, f) in columns.iter().zip(&schema.fields) { 
     if !f.is_nullable() && c.null_count() > 0 { 
         return Err(ArrowError::InvalidArgumentError(format!( 
             "Column '{}' is declared as non-nullable but contains null values", 
             f.name() 
         ))); 
     } 
 } 
  
 // check that all columns have the same row count 
 if columns.iter().any(|c| c.len() != row_count) { 
     let err = match options.row_count { 
         Some(_) => "all columns in a record batch must have the specified row count", 
         None => "all columns in a record batch must have the same length", 
     }; 
     return Err(ArrowError::InvalidArgumentError(err.to_string())); 
 } 
  
 // function for comparing column type and field type 
 // return true if 2 types are not matched 
 let type_not_match = if options.match_field_names { 
     |(_, (col_type, field_type)): &(usize, (&DataType, &DataType))| col_type != field_type 
 } else { 
     |(_, (col_type, field_type)): &(usize, (&DataType, &DataType))| { 
         !col_type.equals_datatype(field_type) 
     } 
 }; 
  
 // check that all columns match the schema 
 let not_match = columns 
     .iter() 
     .zip(schema.fields().iter()) 
     .map(|(col, field)| (col.data_type(), field.data_type())) 
     .enumerate() 
     .find(type_not_match); 
  
 if let Some((i, (col_type, field_type))) = not_match { 
     return Err(ArrowError::InvalidArgumentError(format!( 
         "column types must match schema types, expected {field_type} but found {col_type} at column index {i}" 
     ))); 
 } 
+            // Since we're starting from a valid RecordBatch and project
+            // creates a strict subset of the original, there's no need to
+            // redo the validation checks in `try_new_with_options`.
+            Ok(RecordBatch::new_unchecked(
+                SchemaRef::new(projected_schema),
+                batch_fields,
+                self.row_count,
+            ))
+        }
     }
 
     /// Normalize a semi-structured [`RecordBatch`] into a flat table.