@@ -9,7 +9,7 @@ use crate::cdsl::instructions::{
99} ;
1010use crate :: cdsl:: recipes:: { EncodingRecipe , EncodingRecipeNumber , Recipes } ;
1111use crate :: cdsl:: settings:: { SettingGroup , SettingPredicateNumber } ;
12- use crate :: cdsl:: types:: ValueType ;
12+ use crate :: cdsl:: types:: { LaneType , ValueType } ;
1313use crate :: shared:: types:: Bool :: { B1 , B16 , B32 , B64 , B8 } ;
1414use crate :: shared:: types:: Float :: { F32 , F64 } ;
1515use crate :: shared:: types:: Int :: { I16 , I32 , I64 , I8 } ;
@@ -1596,6 +1596,8 @@ pub fn define(
15961596 // legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the
15971597 // value across the register
15981598
1599+ let allowed_simd_type = |t : & LaneType | t. lane_bits ( ) >= 8 && t. lane_bits ( ) < 128 ;
1600+
15991601 // PSHUFB, 8-bit shuffle using two XMM registers
16001602 for ty in ValueType :: all_lane_types ( ) . filter ( |t| t. lane_bits ( ) == 8 ) {
16011603 let number_of_lanes = 128 / ty. lane_bits ( ) ;
@@ -1617,7 +1619,7 @@ pub fn define(
16171619 // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
16181620 // to the Intel manual: "When the destination operand is an XMM register, the source operand is
16191621 // written to the low doubleword of the register and the regiser is zero-extended to 128 bits."
1620- for ty in ValueType :: all_lane_types ( ) . filter ( |t| t . lane_bits ( ) >= 8 ) {
1622+ for ty in ValueType :: all_lane_types ( ) . filter ( allowed_simd_type ) {
16211623 let number_of_lanes = 128 / ty. lane_bits ( ) ;
16221624 let instruction = scalar_to_vector. bind_vector ( ty, number_of_lanes) . bind ( ty) ;
16231625 let template = rec_frurm. opcodes ( vec ! [ 0x66 , 0x0f , 0x6e ] ) ; // MOVD/MOVQ
@@ -1657,8 +1659,9 @@ pub fn define(
16571659 }
16581660
16591661 // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8)
1660- for from_type in ValueType :: all_lane_types ( ) . filter ( |t| t. lane_bits ( ) >= 8 ) {
1661- for to_type in ValueType :: all_lane_types ( ) . filter ( |t| t. lane_bits ( ) >= 8 && * t != from_type)
1662+ for from_type in ValueType :: all_lane_types ( ) . filter ( allowed_simd_type) {
1663+ for to_type in
1664+ ValueType :: all_lane_types ( ) . filter ( |t| allowed_simd_type ( t) && * t != from_type)
16621665 {
16631666 let instruction = raw_bitcast
16641667 . bind_vector ( to_type, 128 / to_type. lane_bits ( ) )
0 commit comments