`crux-llvm` crashes (Input block type ... does not match expected) during SSA conversion #778

RyanGlScott · 2021-07-20T13:28:03Z

Consider this program, slightly minimized from SV-COMP's soft_float_1-2a.c.cil.c:

unsigned int addflt(unsigned int a , unsigned int b )
{
  unsigned int res ;
  unsigned int ma ;
  unsigned int mb ;
  unsigned int delta ;
  int ea ;
  int eb ;
  unsigned int tmp ;
  unsigned int __retres10 ;

  {
  if (a < b) {
    tmp = a;
    a = b;
    b = tmp;
  } else {

  }
  if (! b) {
    __retres10 = a;
    goto return_label;
  } else {

  }
  {
  ma = a & ((1U << 24U) - 1U);
  ea = (int )(a >> 24U) - 128;
  ma = ma | (1U << 24U);
  mb = b & ((1U << 24U) - 1U);
  eb = (int )(b >> 24U) - 128;
  mb = mb | (1U << 24U);
  delta = ea - eb;
  mb = mb >> delta;
  }
  if (! mb) {
    __retres10 = a;
    goto return_label;
  } else {

  }
  ma = ma + mb;
  if (ma & (1U << 25U)) {
    if (ea == 127) {
      __retres10 = 4294967295U;
      goto return_label;
    } else {

    }
    ea = ea + 1;
    ma = ma >> 1U;
  } else {

  }
  {
  ma = ma & ((1U << 24U) - 1U);
  res = ma | ((unsigned int )(ea + 128) << 24U);
  }
  __retres10 = res;
  return_label: /* CIL Label */
  return (__retres10);
}
}
int main(void)
{
  if (addflt(2,2)) {
    return 0;
  } else {
    return 1;
  }
}

Before commit 690d989, crux-llvm would verify this program. After commit 690d989, however, crux-llvm crashes:

$ cabal run exe:crux-llvm -- ../../../Crux/test.c 
Up to date
Input block type [IntrinsicRepr LLVM_pointer [BVRepr 32], IntrinsicRepr LLVM_pointer [BVRepr 32], IntrinsicRepr LLVM_pointer [BVRepr 32]] does not match expected [IntrinsicRepr LLVM_pointer [BVRepr 32], IntrinsicRepr LLVM_pointer [BVRepr 32]]:
while SSA converting function addflt
CallStack (from HasCallStack):
  error, called at src/Lang/Crucible/CFG/SSAConversion.hs:976:24 in crucible-0.5-inplace:Lang.Crucible.CFG.SSAConversion

The text was updated successfully, but these errors were encountered:

RyanGlScott · 2021-07-20T20:37:20Z

Some Debug.Trace debugging reveals that these are the inputs to the various basic blocks in the program above before the offending commit:

(%2,[$0, $1])
(%5,[r12, r13])
(%8,[r13, r15, r16, r22])
(%9,[])
(%11,[r15, r16, r26])
(%12,[r15, r26])
(%13,[r32, r33])
(%14,[r38])
(%15,[$0, $1])

Compare this to the basic block inputs after the offending commit:

(%2,[r20, $0, $1])
(%5,[r12, r13, r20])
(%8,[r13, r15, r16, r22])
(%9,[])
(%11,[r15, r16, r26])
(%12,[r15, r26])
(%13,[r32, r33])
(%14,[r38])
(%15,[r20, $0, $1])

Somehow, r20 has mysteriously snuck into the inputs! Here is the full CFG for addflt after the offending commit:

IntrinsicRepr LLVM_pointer [BVRepr 32] addflt ($0, $1)
%89
  $90 := r13
  r38 := $90
  jump %10
%91
  
  jump %5
%4
  $39 := dbg.value $0
  $40 := dbg.value $1
  $41 := extensionApp(pointerBlock $0)
  $42 := natLit(0)
  $43 := natEq($41, $42)
  $44 := stringLit("Expected bitvector, but found pointer")
  assert $43 $44
  $45 := extensionApp(pointerBlock $1)
  $46 := natLit(0)
  $47 := natEq($45, $46)
  $48 := stringLit("Expected bitvector, but found pointer")
  assert $47 $48
  $49 := natLit(0)
  $50 := extensionApp(pointerOffset $0)
  $51 := extensionApp(pointerOffset $1)
  $52 := bVUlt(32, $50, $51)
  $53 := boolToBV(1, $52)
  $54 := extensionApp(pointerExpr $49 $53)
  r11 := $54
  $55 := r11
  $56 := extensionApp(pointerBlock $55)
  $57 := natLit(0)
  $58 := natEq($56, $57)
  $59 := not($58)
  $60 := extensionApp(pointerOffset $55)
  $61 := bVNonzero(1, $60)
  $62 := or($59, $61)
  $63 := extensionApp(pointerIte $62 $0 $1)
  r12 := $63
  $64 := r11
  $65 := extensionApp(pointerBlock $64)
  $66 := natLit(0)
  $67 := natEq($65, $66)
  $68 := not($67)
  $69 := extensionApp(pointerOffset $64)
  $70 := bVNonzero(1, $69)
  $71 := or($68, $70)
  $72 := extensionApp(pointerIte $71 $1 $0)
  r13 := $72
  $73 := r13
  $74 := dbg.value $73
  $75 := r12
  $76 := dbg.value $75
  $77 := r12
  $78 := extensionApp(pointerBlock $77)
  $79 := natLit(0)
  $80 := natEq($78, $79)
  $81 := stringLit("Expected bitvector, but found pointer")
  assert $80 $81
  $82 := natLit(0)
  $83 := extensionApp(pointerOffset $77)
  $84 := bVLit(32, BV 0)
  $85 := baseIsEq(BaseBVRepr 32, $83, $84)
  $86 := boolToBV(1, $85)
  $87 := extensionApp(pointerExpr $82 $86)
  r14 := $87
  $88 := r14
  $92 := extensionApp(pointerBlock $88)
  $93 := natLit(0)
  $94 := natEq($92, $93)
  $95 := not($94)
  $96 := extensionApp(pointerOffset $88)
  $97 := bVNonzero(1, $96)
  $98 := or($95, $97)
  branch $98 %89 %91
%262
  $263 := r13
  r38 := $263
  jump %10
%264
  
  jump %6
%5
  $99 := r13
  $100 := dbg.value $99
  $101 := r13
  $102 := extensionApp(pointerBlock $101)
  $103 := natLit(0)
  $104 := natEq($102, $103)
  $105 := stringLit("Expected bitvector, but found pointer")
  assert $104 $105
  $106 := extensionApp(pointerOffset $101)
  $107 := bVLit(32, BV 24)
  $108 := bVLshr(32, $106, $107)
  $109 := bVLit(32, BV 24)
  $110 := bVShl(32, $108, $109)
  $111 := natLit(0)
  $112 := bVLit(32, BV 24)
  $113 := bVLit(32, BV 32)
  $114 := bVUlt(32, $112, $113)
  $115 := extensionApp(pointerOffset $101)
  $116 := bVLit(32, BV 24)
  $117 := extensionApp(sideConditions $108)
  $118 := extensionApp(pointerExpr $111 $117)
  r15 := $118
  $119 := r15
  $120 := extensionApp(pointerBlock $119)
  $121 := natLit(0)
  $122 := natEq($120, $121)
  $123 := stringLit("Expected bitvector, but found pointer")
  assert $122 $123
  $124 := extensionApp(pointerOffset $119)
  $125 := bVLit(32, BV 4294967168)
  $126 := bVAdd(32, $124, $125)
  $127 := natLit(0)
  $128 := extensionApp(pointerOffset $119)
  $129 := bVLit(32, BV 4294967168)
  $130 := bVSCarry(32, $128, $129)
  $131 := not($130)
  $132 := extensionApp(pointerOffset $119)
  $133 := bVLit(32, BV 4294967168)
  $134 := extensionApp(sideConditions $126)
  $135 := extensionApp(pointerExpr $127 $134)
  r16 := $135
  $136 := r16
  $137 := dbg.value $136
  $138 := r13
  $139 := dbg.value $138
  $140 := r12
  $141 := extensionApp(pointerBlock $140)
  $142 := natLit(0)
  $143 := natEq($141, $142)
  $144 := stringLit("Expected bitvector, but found pointer")
  assert $143 $144
  $145 := natLit(0)
  $146 := extensionApp(pointerOffset $140)
  $147 := bVLit(32, BV 16777215)
  $148 := bVAnd(32, $146, $147)
  $149 := extensionApp(pointerExpr $145 $148)
  r17 := $149
  $150 := r17
  $151 := dbg.value $150
  $152 := r12
  $153 := extensionApp(pointerBlock $152)
  $154 := natLit(0)
  $155 := natEq($153, $154)
  $156 := stringLit("Expected bitvector, but found pointer")
  assert $155 $156
  $157 := extensionApp(pointerOffset $152)
  $158 := bVLit(32, BV 24)
  $159 := bVLshr(32, $157, $158)
  $160 := bVLit(32, BV 24)
  $161 := bVShl(32, $159, $160)
  $162 := natLit(0)
  $163 := bVLit(32, BV 24)
  $164 := bVLit(32, BV 32)
  $165 := bVUlt(32, $163, $164)
  $166 := extensionApp(pointerOffset $152)
  $167 := bVLit(32, BV 24)
  $168 := extensionApp(sideConditions $159)
  $169 := extensionApp(pointerExpr $162 $168)
  r18 := $169
  $170 := r20
  $171 := dbg.value $170
  $172 := r17
  $173 := extensionApp(pointerBlock $172)
  $174 := natLit(0)
  $175 := natEq($173, $174)
  $176 := stringLit("Expected bitvector, but found pointer")
  assert $175 $176
  $177 := natLit(0)
  $178 := extensionApp(pointerOffset $172)
  $179 := bVLit(32, BV 16777216)
  $180 := bVOr(32, $178, $179)
  $181 := extensionApp(pointerExpr $177 $180)
  r19 := $181
  $182 := r19
  $183 := dbg.value $182
  $184 := r18
  $185 := extensionApp(pointerBlock $184)
  $186 := natLit(0)
  $187 := natEq($185, $186)
  $188 := stringLit("Expected bitvector, but found pointer")
  assert $187 $188
  $189 := bVLit(32, BV 128)
  $190 := extensionApp(pointerOffset $184)
  $191 := bVSub(32, $189, $190)
  $192 := natLit(0)
  $193 := bVLit(32, BV 128)
  $194 := extensionApp(pointerOffset $184)
  $195 := bVSBorrow(32, $193, $194)
  $196 := not($195)
  $197 := bVLit(32, BV 128)
  $198 := extensionApp(pointerOffset $184)
  $199 := extensionApp(sideConditions $191)
  $200 := extensionApp(pointerExpr $192 $199)
  r20 := $200
  $201 := r20
  $202 := r16
  $203 := extensionApp(pointerBlock $201)
  $204 := natLit(0)
  $205 := natEq($203, $204)
  $206 := stringLit("Expected bitvector, but found pointer")
  assert $205 $206
  $207 := extensionApp(pointerBlock $202)
  $208 := natLit(0)
  $209 := natEq($207, $208)
  $210 := stringLit("Expected bitvector, but found pointer")
  assert $209 $210
  $211 := extensionApp(pointerOffset $201)
  $212 := extensionApp(pointerOffset $202)
  $213 := bVAdd(32, $211, $212)
  $214 := natLit(0)
  $215 := extensionApp(pointerOffset $201)
  $216 := extensionApp(pointerOffset $202)
  $217 := bVSCarry(32, $215, $216)
  $218 := not($217)
  $219 := extensionApp(pointerOffset $201)
  $220 := extensionApp(pointerOffset $202)
  $221 := extensionApp(sideConditions $213)
  $222 := extensionApp(pointerExpr $214 $221)
  r21 := $222
  $223 := r21
  $224 := dbg.value $223
  $225 := r19
  $226 := r21
  $227 := extensionApp(pointerBlock $225)
  $228 := natLit(0)
  $229 := natEq($227, $228)
  $230 := stringLit("Expected bitvector, but found pointer")
  assert $229 $230
  $231 := extensionApp(pointerBlock $226)
  $232 := natLit(0)
  $233 := natEq($231, $232)
  $234 := stringLit("Expected bitvector, but found pointer")
  assert $233 $234
  $235 := extensionApp(pointerOffset $225)
  $236 := extensionApp(pointerOffset $226)
  $237 := bVLshr(32, $235, $236)
  $238 := extensionApp(pointerOffset $226)
  $239 := bVShl(32, $237, $238)
  $240 := natLit(0)
  $241 := extensionApp(pointerOffset $226)
  $242 := bVLit(32, BV 32)
  $243 := bVUlt(32, $241, $242)
  $244 := extensionApp(pointerOffset $225)
  $245 := extensionApp(pointerOffset $226)
  $246 := extensionApp(sideConditions $237)
  $247 := extensionApp(pointerExpr $240 $246)
  r22 := $247
  $248 := r22
  $249 := dbg.value $248
  $250 := r22
  $251 := extensionApp(pointerBlock $250)
  $252 := natLit(0)
  $253 := natEq($251, $252)
  $254 := stringLit("Expected bitvector, but found pointer")
  assert $253 $254
  $255 := natLit(0)
  $256 := extensionApp(pointerOffset $250)
  $257 := bVLit(32, BV 0)
  $258 := baseIsEq(BaseBVRepr 32, $256, $257)
  $259 := boolToBV(1, $258)
  $260 := extensionApp(pointerExpr $255 $259)
  r23 := $260
  $261 := r23
  $265 := extensionApp(pointerBlock $261)
  $266 := natLit(0)
  $267 := natEq($265, $266)
  $268 := not($267)
  $269 := extensionApp(pointerOffset $261)
  $270 := bVNonzero(1, $269)
  $271 := or($268, $270)
  branch $271 %262 %264
%337
  $338 := r26
  $339 := r16
  r32 := $338
  r33 := $339
  jump %9
%340
  
  jump %7
%6
  $272 := r13
  $273 := extensionApp(pointerBlock $272)
  $274 := natLit(0)
  $275 := natEq($273, $274)
  $276 := stringLit("Expected bitvector, but found pointer")
  assert $275 $276
  $277 := natLit(0)
  $278 := extensionApp(pointerOffset $272)
  $279 := bVLit(32, BV 16777215)
  $280 := bVAnd(32, $278, $279)
  $281 := extensionApp(pointerExpr $277 $280)
  r24 := $281
  $282 := r24
  $283 := dbg.value $282
  $284 := r24
  $285 := dbg.value $284
  $286 := r24
  $287 := extensionApp(pointerBlock $286)
  $288 := natLit(0)
  $289 := natEq($287, $288)
  $290 := stringLit("Expected bitvector, but found pointer")
  assert $289 $290
  $291 := natLit(0)
  $292 := extensionApp(pointerOffset $286)
  $293 := bVLit(32, BV 16777216)
  $294 := bVOr(32, $292, $293)
  $295 := extensionApp(pointerExpr $291 $294)
  r25 := $295
  $296 := r25
  $297 := dbg.value $296
  $298 := r22
  $299 := r25
  $300 := extensionApp(pointerBlock $298)
  $301 := natLit(0)
  $302 := natEq($300, $301)
  $303 := stringLit("Expected bitvector, but found pointer")
  assert $302 $303
  $304 := extensionApp(pointerBlock $299)
  $305 := natLit(0)
  $306 := natEq($304, $305)
  $307 := stringLit("Expected bitvector, but found pointer")
  assert $306 $307
  $308 := extensionApp(pointerOffset $298)
  $309 := extensionApp(pointerOffset $299)
  $310 := bVAdd(32, $308, $309)
  $311 := natLit(0)
  $312 := extensionApp(pointerExpr $311 $310)
  r26 := $312
  $313 := r26
  $314 := dbg.value $313
  $315 := r26
  $316 := extensionApp(pointerBlock $315)
  $317 := natLit(0)
  $318 := natEq($316, $317)
  $319 := stringLit("Expected bitvector, but found pointer")
  assert $318 $319
  $320 := natLit(0)
  $321 := extensionApp(pointerOffset $315)
  $322 := bVLit(32, BV 33554432)
  $323 := bVAnd(32, $321, $322)
  $324 := extensionApp(pointerExpr $320 $323)
  r27 := $324
  $325 := r27
  $326 := extensionApp(pointerBlock $325)
  $327 := natLit(0)
  $328 := natEq($326, $327)
  $329 := stringLit("Expected bitvector, but found pointer")
  assert $328 $329
  $330 := natLit(0)
  $331 := extensionApp(pointerOffset $325)
  $332 := bVLit(32, BV 0)
  $333 := baseIsEq(BaseBVRepr 32, $331, $332)
  $334 := boolToBV(1, $333)
  $335 := extensionApp(pointerExpr $330 $334)
  r28 := $335
  $336 := r28
  $341 := extensionApp(pointerBlock $336)
  $342 := natLit(0)
  $343 := natEq($341, $342)
  $344 := not($343)
  $345 := extensionApp(pointerOffset $336)
  $346 := bVNonzero(1, $345)
  $347 := or($344, $346)
  branch $347 %337 %340
%360
  $361 := natLit(0)
  $362 := bVLit(32, BV 4294967295)
  $363 := extensionApp(pointerExpr $361 $362)
  r38 := $363
  jump %10
%364
  
  jump %8
%7
  $348 := r16
  $349 := extensionApp(pointerBlock $348)
  $350 := natLit(0)
  $351 := natEq($349, $350)
  $352 := stringLit("Expected bitvector, but found pointer")
  assert $351 $352
  $353 := natLit(0)
  $354 := extensionApp(pointerOffset $348)
  $355 := bVLit(32, BV 127)
  $356 := baseIsEq(BaseBVRepr 32, $354, $355)
  $357 := boolToBV(1, $356)
  $358 := extensionApp(pointerExpr $353 $357)
  r29 := $358
  $359 := r29
  $365 := extensionApp(pointerBlock $359)
  $366 := natLit(0)
  $367 := natEq($365, $366)
  $368 := not($367)
  $369 := extensionApp(pointerOffset $359)
  $370 := bVNonzero(1, $369)
  $371 := or($368, $370)
  branch $371 %360 %364
%8
  $372 := r15
  $373 := extensionApp(pointerBlock $372)
  $374 := natLit(0)
  $375 := natEq($373, $374)
  $376 := stringLit("Expected bitvector, but found pointer")
  assert $375 $376
  $377 := extensionApp(pointerOffset $372)
  $378 := bVLit(32, BV 4294967169)
  $379 := bVAdd(32, $377, $378)
  $380 := natLit(0)
  $381 := extensionApp(pointerOffset $372)
  $382 := bVLit(32, BV 4294967169)
  $383 := bVSCarry(32, $381, $382)
  $384 := not($383)
  $385 := extensionApp(pointerOffset $372)
  $386 := bVLit(32, BV 4294967169)
  $387 := extensionApp(sideConditions $379)
  $388 := extensionApp(pointerExpr $380 $387)
  r30 := $388
  $389 := r30
  $390 := dbg.value $389
  $391 := r26
  $392 := extensionApp(pointerBlock $391)
  $393 := natLit(0)
  $394 := natEq($392, $393)
  $395 := stringLit("Expected bitvector, but found pointer")
  assert $394 $395
  $396 := extensionApp(pointerOffset $391)
  $397 := bVLit(32, BV 1)
  $398 := bVLshr(32, $396, $397)
  $399 := bVLit(32, BV 1)
  $400 := bVShl(32, $398, $399)
  $401 := natLit(0)
  $402 := bVLit(32, BV 1)
  $403 := bVLit(32, BV 32)
  $404 := bVUlt(32, $402, $403)
  $405 := extensionApp(pointerOffset $391)
  $406 := bVLit(32, BV 1)
  $407 := extensionApp(sideConditions $398)
  $408 := extensionApp(pointerExpr $401 $407)
  r31 := $408
  $409 := r31
  $410 := dbg.value $409
  $411 := r31
  $412 := r30
  r32 := $411
  r33 := $412
  jump %9
%9
  $413 := r33
  $414 := dbg.value $413
  $415 := r32
  $416 := dbg.value $415
  $417 := r32
  $418 := extensionApp(pointerBlock $417)
  $419 := natLit(0)
  $420 := natEq($418, $419)
  $421 := stringLit("Expected bitvector, but found pointer")
  assert $420 $421
  $422 := natLit(0)
  $423 := extensionApp(pointerOffset $417)
  $424 := bVLit(32, BV 16777215)
  $425 := bVAnd(32, $423, $424)
  $426 := extensionApp(pointerExpr $422 $425)
  r34 := $426
  $427 := r34
  $428 := dbg.value $427
  $429 := r33
  $430 := extensionApp(pointerBlock $429)
  $431 := natLit(0)
  $432 := natEq($430, $431)
  $433 := stringLit("Expected bitvector, but found pointer")
  assert $432 $433
  $434 := extensionApp(pointerOffset $429)
  $435 := bVLit(32, BV 24)
  $436 := bVShl(32, $434, $435)
  $437 := bVLit(32, BV 24)
  $438 := bVLshr(32, $436, $437)
  $439 := bVLit(32, BV 24)
  $440 := bVAshr(32, $436, $439)
  $441 := natLit(0)
  $442 := bVLit(32, BV 24)
  $443 := bVLit(32, BV 32)
  $444 := bVUlt(32, $442, $443)
  $445 := extensionApp(pointerOffset $429)
  $446 := bVLit(32, BV 24)
  $447 := extensionApp(sideConditions $436)
  $448 := extensionApp(pointerExpr $441 $447)
  r35 := $448
  $449 := r35
  $450 := r34
  $451 := extensionApp(pointerBlock $449)
  $452 := natLit(0)
  $453 := natEq($451, $452)
  $454 := stringLit("Expected bitvector, but found pointer")
  assert $453 $454
  $455 := extensionApp(pointerBlock $450)
  $456 := natLit(0)
  $457 := natEq($455, $456)
  $458 := stringLit("Expected bitvector, but found pointer")
  assert $457 $458
  $459 := natLit(0)
  $460 := extensionApp(pointerOffset $449)
  $461 := extensionApp(pointerOffset $450)
  $462 := bVOr(32, $460, $461)
  $463 := extensionApp(pointerExpr $459 $462)
  r36 := $463
  $464 := r36
  $465 := extensionApp(pointerBlock $464)
  $466 := natLit(0)
  $467 := natEq($465, $466)
  $468 := stringLit("Expected bitvector, but found pointer")
  assert $467 $468
  $469 := natLit(0)
  $470 := extensionApp(pointerOffset $464)
  $471 := bVLit(32, BV 2147483648)
  $472 := bVXor(32, $470, $471)
  $473 := extensionApp(pointerExpr $469 $472)
  r37 := $473
  $474 := r37
  $475 := dbg.value $474
  $476 := r37
  $477 := dbg.value $476
  $478 := r37
  r38 := $478
  jump %10
%10
  $479 := r38
  $480 := dbg.value $479
  $481 := r38
  $482 := popFrame crux:llvm_memory
  return $481
%2
  $3 := pushFrame addflt crux:llvm_memory
  jump %4

Indeed, we can see that r20 is referenced ($170 := r20; $171 := dbg.value $170) before it is assigned (r20 := $200). I'm very unclear on how this happens, however.

robdockins · 2021-07-20T20:39:03Z

Hummm... the new debug statements might not be playing nicely with the SSA analysis for some reason...

EDIT: nevermind, we're looking at the pre-SSA translation.

robdockins · 2021-07-20T20:56:57Z

Well, here's a chunk of the LLVM assembly from this program:

; <label>:7:                                      ; preds = %2
  %8 = lshr i32 %5, 24, !dbg !35
  %9 = add nsw i32 %8, -128, !dbg !37
  call void @llvm.dbg.value(metadata i32 %9, metadata !22, metadata !DIExpression()), !dbg !38
  %10 = and i32 %4, 16777215, !dbg !39
  call void @llvm.dbg.value(metadata i32 %10, metadata !20, metadata !DIExpression()), !dbg !40
  %11 = lshr i32 %4, 24, !dbg !41
  call void @llvm.dbg.value(metadata i32 %13, metadata !23, metadata !DIExpression()), !dbg !42
  %12 = or i32 %10, 16777216, !dbg !43
  call void @llvm.dbg.value(metadata i32 %12, metadata !20, metadata !DIExpression()), !dbg !40
  %13 = sub nsw i32 128, %11, !dbg !44
  %14 = add nsw i32 %13, %9, !dbg !45
  call void @llvm.dbg.value(metadata i32 %14, metadata !21, metadata !DIExpression()), !dbg !46
  %15 = lshr i32 %12, %14, !dbg !47
  call void @llvm.dbg.value(metadata i32 %15, metadata !20, metadata !DIExpression()), !dbg !40
  %16 = icmp eq i32 %15, 0, !dbg !48
  br i1 %16, label %35, label %17, !dbg !50

Notice that %13 is used before it is defined.... sigh

robdockins · 2021-07-20T21:04:19Z

This looks like a clang bug to me, but maybe LLVM intends metadata values to be able to magically look forward in time to when values get defined.

robdockins · 2021-07-20T21:23:53Z

A couple of things come to mind:

We should really have better error messages for when SSA conversion fails like this; it is pretty much always due to a virtual register being used before it is defined.
Use-before-def of an LLVM virtual register seems like something specific that we can check for inside crucible-llvm during translation. Loop-carried dependencies are mediated by phi nodes, so I believe we should never see a raw use-before-def situation inside a basic block.
We should file a clang bug about this. At least we can hope to discover if this is a genuine bug or very strange intended behavior.
Even if it is a clang bug, it will be awhile before we can convince the LLVM team to fix it and get a new release out. What do we want to do in the meantime?

robdockins · 2021-07-20T22:12:55Z

Here is a more minimal program showing the problematic behavior.

shrink.c

unsigned int addflt(unsigned int a , unsigned int b )
{
  unsigned int ma = a;
  unsigned int mb = b;
  int ea = (int)(a >> 24U) - 128;
  int eb = (int)(b >> 24U) - 128;
  unsigned int delta = ea - eb;

  mb = mb >> delta;
  ma = ma + mb;

  // This if statement seems critical
  if (ma) {
    ea = ea + 1;
  }

  return ((unsigned int) ea);
}

$ clang --version
Apple clang version 11.0.0 (clang-1100.0.33.17)
Target: x86_64-apple-darwin20.5.0
Thread model: posix
InstalledDir: /Library/Developer/CommandLineTools/usr/bin
$ clang -S -g -O1 -emit-llvm shrink.c

The relevant section of shrink.ll

; Function Attrs: norecurse nounwind readnone ssp uwtable
define i32 @addflt(i32, i32) local_unnamed_addr #0 !dbg !12 {
  call void @llvm.dbg.value(metadata i32 %0, metadata !16, metadata !DIExpression()), !dbg !23
  call void @llvm.dbg.value(metadata i32 %1, metadata !17, metadata !DIExpression()), !dbg !24
  call void @llvm.dbg.value(metadata i32 %0, metadata !18, metadata !DIExpression()), !dbg !25
  call void @llvm.dbg.value(metadata i32 %1, metadata !19, metadata !DIExpression()), !dbg !26
  %3 = lshr i32 %0, 24, !dbg !27
  %4 = add nsw i32 %3, -128, !dbg !28
  call void @llvm.dbg.value(metadata i32 %4, metadata !20, metadata !DIExpression()), !dbg !29
  %5 = lshr i32 %1, 24, !dbg !30
  call void @llvm.dbg.value(metadata i32 %6, metadata !21, metadata !DIExpression()), !dbg !31
  %6 = sub nsw i32 128, %5, !dbg !32
  %7 = add nsw i32 %6, %4, !dbg !33
  call void @llvm.dbg.value(metadata i32 %7, metadata !22, metadata !DIExpression()), !dbg !34
  %8 = lshr i32 %1, %7, !dbg !35
  call void @llvm.dbg.value(metadata i32 %8, metadata !19, metadata !DIExpression()), !dbg !26
  %9 = sub i32 0, %0, !dbg !36
  %10 = icmp eq i32 %8, %9, !dbg !36
  %11 = add nsw i32 %3, -127, !dbg !38
  %12 = select i1 %10, i32 %4, i32 %11, !dbg !40
  call void @llvm.dbg.value(metadata i32 %12, metadata !20, metadata !DIExpression()), !dbg !29
  ret i32 %12, !dbg !41
}

Here, %6 is used in a llvm.dbg.value call immediately before it is defined.

RyanGlScott · 2021-07-21T15:13:58Z

Excellent sleuthing. Also, oof.

We should file a clang bug about this. At least we can hope to discover if this is a genuine bug or very strange intended behavior.

I brought up this topic on the LLVM Discord, and since at least two different people were surprised by this behavior, I went ahead and filed a bug here.

Even if it is a clang bug, it will be awhile before we can convince the LLVM team to fix it and get a new release out. What do we want to do in the meantime?

An excellent question. I can think of various workarounds:

Do nothing, and tell users to work around the issue by specifying opt-level: 0, in which this bug does not seem to occur. This is a bit of a heavy-handed solution, however.
Work around the issue by allowing users to disable the use of the -g flag, which would prevent debug intrinsics from being generated in the first place. Again, this is a rather heavy-handed solution, and I'm unclear if removing the use of -g would have ramifications elsewhere.
Continue using -g, but work around the issue by allowing users to ignore debug intrinsic statements in Crucible. This is perhaps the least invasive option, as it would allow users to return to how Crucible behaved prior to commit 690d989.

Of those options, (3) sounds the most palatable to me.

travitch · 2021-07-21T15:16:11Z

How challenging would it be to recognize the offending statements and just ignore the ones that violate the SSA invariant?

RyanGlScott · 2021-07-21T15:28:59Z

Ah, that's an interesting idea—I'll call that option (4). @robdockins suggested in #778 (comment) that we should check for virtual register use-before-def errors during crucible-llvm translation anyway, so perhaps we can add a (hopefully temporary) special case for virtual registers that appear as an argument to llvm.dbg.value calls.

travitch · 2021-07-21T15:34:29Z

Just in practical terms, we might need to do the check in crucible-llvm somewhere before SSA conversion, because the SSA conversion doesn't get to know anything about the architecture extensions it is translating

robdockins · 2021-07-21T17:57:09Z

I think we should implement option 3 regardless; we probably want that to be the default for crux, since we aren't (currently?) making use of that debug information anyway.

Unfortunately, Clang can sometimes generate ill-scoped `llvm.dbg` statements (see #778), so this patch adds a `debug-intrinsics` option that controls whether `llvm.dbg` statements should be translated at all. The default value for `crux-llvm` is `no`, as `crux-llvm` does not yet make use of `llvm.dbg` information. We may wish to revisit this choice in the future should the upstream LLVM bug (https://bugs.llvm.org/show_bug.cgi?id=51155) be fixed. Fixes #778.

glguy · 2021-07-21T23:00:02Z

%6 is used in a metadata context ahead of time. Is there a way that things could be modeled differently in crucible so that we could solve this generically?

RyanGlScott · 2021-07-22T10:58:50Z

I'm not yet sure what a generic solution to this problem would look like. For what it's worth, there is a proposal to alter DBG_VALUE (the machine IR counterpart to llvm.dbg.value that is used after instruction selection in LLVM) which would permit DBG_VALUEs to be used-before-def. Here is an excerpt from the proposal:

I reckon debug-use-before-def's can be tolerated in this
representation, and even be well defined and useful, reducing the work
needed to be done earlier in the compiler. Under the model described
above, we can specify a program location before the corresponding
machine location containing the variable value machine location
becomes available. Consider this code:
DBG_VALUE output-of-this-inst ---
someinst1                        |
someinst2                        |
$rax = ADD32ri $rax, 0     <-----
Where the line from DBG_VALUE to ADD32ri represents some
as-yet-undetermined way of identifying the ADD32ri instruction from
the DBG_VALUE. We can interpret such a code sequence as the variable
having no location across someinst1 and someinst2, which are not
dominated by the defining instruction, then a location of $rax after
the ADD32ri. Essentially:

For an instruction dominated by a DBG_VALUE but not by the defining
instruction, the variable location is empty / undef / $noreg,

For an instruction dominated by both, the variable location is
defined as it is today.

This should work across control flow, and doesn't necessitate the
creation of DBG_VALUE $noreg's to explicitly describe unavailable
locations when instructions move. In theory, if we were to accept
debug use-before-defs in LLVM-IR, this would reduce analysis and mean
fewer dbg.value(undef,...)'s would need to be created earlier in the
compiler.

If I understand that correctly, the author of the proposal wants to move away from debug statements specifying machine locations with virtual registers and instead have them specify machine locations by way of their defining instructions. Under such a proposal, debug statements would be permitted to "look forward in time". (The proposal link above contains the full motivation for why this is a useful thing to have, but it's not directly relevant to this conversation.)

I mention this proposal because the author of the proposal has also suggested adopting a similar scheme for llvm.dbg.value statements in the LLVM IR—see Comment 3 on my bug report. To be clear, this proposal is not currently implemented, and I'd say that the jury is still out on whether LLVM IR programs such as the one in #778 (comment) are actually valid. Nevertheless, I thought I'd mention this as a possible direction that LLVM might eventually pursue.

Unfortunately, Clang can sometimes generate ill-scoped `llvm.dbg` statements (see #778), so this patch adds a `debug-intrinsics` option that controls whether `llvm.dbg` statements should be translated at all. The default value for `crux-llvm` is `no`, as `crux-llvm` does not yet make use of `llvm.dbg` information. We may wish to revisit this choice in the future should the upstream LLVM bug (https://bugs.llvm.org/show_bug.cgi?id=51155) be fixed. Fixes #778.

RyanGlScott · 2021-07-23T20:42:44Z

The popular opinion on https://bugs.llvm.org/show_bug.cgi?id=51155 seems to be that uses-before-defs in llvm.dbg statements are essentially expected behavior, despite not being documented as such yet. Moreover, this comment explicitly endorses a mitigation strategy much like the one proposed in #778 (comment). As such, I don't have much hope that the status quo will change in future versions of LLVM.

I propose that we go ahead with #782 and open a separate issue to discuss allowing programs like the one in #778 (comment) to work even when debugIntrinsics is set to True.

Unfortunately, Clang can sometimes generate ill-scoped `llvm.dbg` statements (see #778), so this patch adds a `debug-intrinsics` option that controls whether `llvm.dbg` statements should be translated at all. The default value for `crux-llvm` is `no`, as `crux-llvm` does not yet make use of `llvm.dbg` information. We may wish to revisit this choice in the future should the upstream LLVM bug (https://bugs.llvm.org/show_bug.cgi?id=51155) be fixed. Fixes #778.

RyanGlScott · 2021-08-04T13:21:54Z

See also #798, which is about making programs like the one in #778 (comment) work with debug-intrinsics enabled.

RyanGlScott added bug llvm crux labels Jul 20, 2021

RyanGlScott mentioned this issue Jul 21, 2021

Add debug-intrinsics option to control translating llvm.dbg statements #782

Merged

RyanGlScott mentioned this issue Jul 21, 2021

Adapt to debugIntrinsics option in crucible GaloisInc/saw-script#1388

Closed

RyanGlScott mentioned this issue Aug 4, 2021

crucible-llvm: Perform def/use analysis on llvm.dbg.value statements #798

Closed

RyanGlScott closed this as completed in #782 Aug 4, 2021

robdockins mentioned this issue Oct 14, 2021

Dbg def use #886

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

`crux-llvm` crashes (Input block type ... does not match expected) during SSA conversion #778

`crux-llvm` crashes (Input block type ... does not match expected) during SSA conversion #778

RyanGlScott commented Jul 20, 2021

RyanGlScott commented Jul 20, 2021

robdockins commented Jul 20, 2021 •

edited

Loading

robdockins commented Jul 20, 2021

robdockins commented Jul 20, 2021

robdockins commented Jul 20, 2021

robdockins commented Jul 20, 2021

RyanGlScott commented Jul 21, 2021

travitch commented Jul 21, 2021

RyanGlScott commented Jul 21, 2021 •

edited

Loading

travitch commented Jul 21, 2021

robdockins commented Jul 21, 2021

glguy commented Jul 21, 2021

RyanGlScott commented Jul 22, 2021

RyanGlScott commented Jul 23, 2021

RyanGlScott commented Aug 4, 2021

crux-llvm crashes (Input block type ... does not match expected) during SSA conversion #778

crux-llvm crashes (Input block type ... does not match expected) during SSA conversion #778

Comments

RyanGlScott commented Jul 20, 2021

RyanGlScott commented Jul 20, 2021

robdockins commented Jul 20, 2021 • edited Loading

robdockins commented Jul 20, 2021

robdockins commented Jul 20, 2021

robdockins commented Jul 20, 2021

robdockins commented Jul 20, 2021

RyanGlScott commented Jul 21, 2021

travitch commented Jul 21, 2021

RyanGlScott commented Jul 21, 2021 • edited Loading

travitch commented Jul 21, 2021

robdockins commented Jul 21, 2021

glguy commented Jul 21, 2021

RyanGlScott commented Jul 22, 2021

RyanGlScott commented Jul 23, 2021

RyanGlScott commented Aug 4, 2021

`crux-llvm` crashes (Input block type ... does not match expected) during SSA conversion #778

`crux-llvm` crashes (Input block type ... does not match expected) during SSA conversion #778

robdockins commented Jul 20, 2021 •

edited

Loading

RyanGlScott commented Jul 21, 2021 •

edited

Loading