Skip to content

Commit e8c33ba

Browse files
authored
Various improvements and fixes (#1817)
* Reduce log level for explorer API errors * Reduce log level for remote peer invalid open_channel * Don't send duplicate commands in PostRestartHtlcCleaner: if there is already a pending HTLC settlement command in the DB, the post restart handler should let the channel replay it instead of sending a conflicting command. * Workaround for lnd bug in reestablish: sometimes lnd sends announcement_signatures before sending their channel reestablish. This is a minor spec violation, we can simply delay the message and handle it later (hopefully once we've received their reestablish). * Log shared secrets in Sphinx error: Breez sometimes returns errors that we fail to parse. Unfortunately we didn't correctly log the shared secrets because the variable was shadowed, so we can't investigate further for now. * Fix utxo metric checks: if we're unable to fetch the number of unconfirmed parents for a utxo, this shouldn't cause the global utxo check to fail. We log a warning and let operations continue to ensure the metric is updated. * Handle ChannelIdAssigned when disconnected: there may be a race condition where a peer disconnect in the middle of a channel id assignment. In that case, we still want to record the up-to-date mapping.
1 parent 98cae45 commit e8c33ba

File tree

10 files changed

+114
-47
lines changed

10 files changed

+114
-47
lines changed

eclair-core/src/main/scala/fr/acinq/eclair/blockchain/bitcoind/ZmqWatcher.scala

+5-1
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,11 @@ private class ZmqWatcher(chainHash: ByteVector32, blockCount: AtomicLong, client
401401
def getUnconfirmedAncestorCount(utxo: Utxo): Future[(ByteVector32, Long)] = client.rpcClient.invoke("getmempoolentry", utxo.txId).map(json => {
402402
val JInt(ancestorCount) = json \ "ancestorcount"
403403
(utxo.txId, ancestorCount.toLong)
404-
})
404+
}).recover {
405+
case ex: Throwable =>
406+
log.warn(s"could not retrieve unconfirmed ancestor count for txId=${utxo.txId} amount=${utxo.amount}:", ex)
407+
(utxo.txId, 0)
408+
}
405409

406410
def getUnconfirmedAncestorCountMap(utxos: Seq[Utxo]): Future[Map[ByteVector32, Long]] = Future.sequence(utxos.filter(_.confirmations == 0).map(getUnconfirmedAncestorCount)).map(_.toMap)
407411

eclair-core/src/main/scala/fr/acinq/eclair/blockchain/watchdogs/ExplorerApi.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ object ExplorerApi {
8585
Behaviors.stopped
8686

8787
case WrappedFailure(e) =>
88-
context.log.error(s"${explorer.name} failed: ", e)
88+
context.log.warn(s"${explorer.name} failed: ", e)
8989
Metrics.WatchdogError.withTag(Tags.Source, explorer.name).increment()
9090
Behaviors.stopped
9191
}

eclair-core/src/main/scala/fr/acinq/eclair/channel/Channel.scala

+11-2
Original file line numberDiff line numberDiff line change
@@ -1443,7 +1443,7 @@ class Channel(val nodeParams: NodeParams, val wallet: EclairWallet, remoteNodeId
14431443
// finally, if one of the unilateral closes is done, we move to CLOSED state, otherwise we stay (note that we don't store the state)
14441444
closingType_opt match {
14451445
case Some(closingType) =>
1446-
log.info(s"channel closed (type=$closingType)")
1446+
log.info(s"channel closed (type=${closingType_opt.map(c => EventType.Closed(c).label).getOrElse("UnknownYet")})")
14471447
context.system.eventStream.publish(ChannelClosed(self, d.channelId, closingType, d.commitments))
14481448
goto(CLOSED) using d1 storing()
14491449
case None =>
@@ -1701,6 +1701,14 @@ class Channel(val nodeParams: NodeParams, val wallet: EclairWallet, remoteNodeId
17011701
peer ! Peer.Disconnect(remoteNodeId)
17021702
stay
17031703

1704+
// This handler is a workaround for an issue in lnd similar to the one above: they sometimes send announcement_signatures
1705+
// before channel_reestablish, which is a minor spec violation. It doesn't halt the channel, we can simply postpone
1706+
// that message.
1707+
case Event(remoteAnnSigs: AnnouncementSignatures, _) =>
1708+
log.warning("received announcement_signatures before channel_reestablish (known lnd bug): delaying...")
1709+
context.system.scheduler.scheduleOnce(5 seconds, self, remoteAnnSigs)
1710+
stay
1711+
17041712
case Event(c: CurrentBlockCount, d: HasCommitments) => handleNewBlock(c, d)
17051713

17061714
case Event(c: CurrentFeerates, d: HasCommitments) =>
@@ -2125,11 +2133,12 @@ class Channel(val nodeParams: NodeParams, val wallet: EclairWallet, remoteNodeId
21252133
private def handleLocalError(cause: Throwable, d: Data, msg: Option[Any]) = {
21262134
cause match {
21272135
case _: ForcedLocalCommit => log.warning(s"force-closing channel at user request")
2136+
case _ if stateName == WAIT_FOR_OPEN_CHANNEL => log.warning(s"${cause.getMessage} while processing msg=${msg.getOrElse("n/a").getClass.getSimpleName} in state=$stateName")
21282137
case _ => log.error(s"${cause.getMessage} while processing msg=${msg.getOrElse("n/a").getClass.getSimpleName} in state=$stateName")
21292138
}
21302139
cause match {
21312140
case _: ChannelException => ()
2132-
case _ => log.error(cause, s"msg=${msg.getOrElse("n/a")} stateData=$stateData ")
2141+
case _ => log.error(cause, s"msg=${msg.getOrElse("n/a")} stateData=$stateData")
21332142
}
21342143
val error = Error(d.channelId, cause.getMessage)
21352144
context.system.eventStream.publish(ChannelErrorOccurred(self, stateData.channelId, remoteNodeId, stateData, LocalError(cause), isFatal = true))

eclair-core/src/main/scala/fr/acinq/eclair/crypto/Sphinx.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ object Sphinx extends Logging {
356356
require(packet.length == PacketLength, s"invalid error packet length ${packet.length}, must be $PacketLength")
357357

358358
@tailrec
359-
def loop(packet: ByteVector, sharedSecrets: Seq[(ByteVector32, PublicKey)]): DecryptedFailurePacket = sharedSecrets match {
359+
def loop(packet: ByteVector, secrets: Seq[(ByteVector32, PublicKey)]): DecryptedFailurePacket = secrets match {
360360
case Nil => throw new RuntimeException(s"couldn't parse error packet=$packet with sharedSecrets=$sharedSecrets")
361361
case (secret, pubkey) :: tail =>
362362
val packet1 = wrap(packet, secret)

eclair-core/src/main/scala/fr/acinq/eclair/io/Peer.scala

+7-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@
1616

1717
package fr.acinq.eclair.io
1818

19-
import akka.actor.typed
20-
import akka.actor.{Actor, ActorContext, ActorRef, ExtendedActorSystem, FSM, OneForOneStrategy, PossiblyHarmful, Props, Status, SupervisorStrategy, Terminated}
19+
import akka.actor.{Actor, ActorContext, ActorRef, ExtendedActorSystem, FSM, OneForOneStrategy, PossiblyHarmful, Props, Status, SupervisorStrategy, Terminated, typed}
2120
import akka.event.Logging.MDC
2221
import akka.event.{BusLogging, DiagnosticLoggingAdapter}
2322
import akka.util.Timeout
@@ -86,6 +85,12 @@ class Peer(val nodeParams: NodeParams, remoteNodeId: PublicKey, wallet: EclairWa
8685
stay using d.copy(channels = channels1)
8786
}
8887

88+
// This event is usually handled while we're connected, but if our peer disconnects right when we're emitting this,
89+
// we still want to record the channelId mapping.
90+
case Event(ChannelIdAssigned(channel, _, temporaryChannelId, channelId), d: DisconnectedData) =>
91+
log.info(s"channel id switch: previousId=$temporaryChannelId nextId=$channelId")
92+
stay using d.copy(channels = d.channels + (FinalChannelId(channelId) -> channel))
93+
8994
case Event(_: LightningMessage, _) => stay // we probably just got disconnected and that's the last messages we received
9095
}
9196

eclair-core/src/main/scala/fr/acinq/eclair/payment/relay/PostRestartHtlcCleaner.scala

+8-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,13 @@ class PostRestartHtlcCleaner(nodeParams: NodeParams, register: ActorRef, initial
7070
val nonStandardRelayedOutHtlcs: Map[Origin, Set[(ByteVector32, Long)]] = nodeParams.pluginParams.collect { case p: CustomCommitmentsPlugin => p.getHtlcsRelayedOut(htlcsIn, nodeParams, log) }.flatten.toMap
7171
val relayedOut: Map[Origin, Set[(ByteVector32, Long)]] = getHtlcsRelayedOut(channels, htlcsIn) ++ nonStandardRelayedOutHtlcs
7272

73-
val notRelayed = htlcsIn.filterNot(htlcIn => relayedOut.keys.exists(origin => matchesOrigin(htlcIn.add, origin)))
73+
val settledHtlcs: Set[(ByteVector32, Long)] = nodeParams.db.pendingCommands.listSettlementCommands().map { case (channelId, cmd) => (channelId, cmd.id) }.toSet
74+
val notRelayed = htlcsIn.filterNot(htlcIn => {
75+
// If an HTLC has been relayed and then settled downstream, it will not have a matching entry in relayedOut.
76+
// When that happens, there will be an HTLC settlement command in the pendingRelay DB, and we will let the channel
77+
// replay it instead of sending a conflicting command.
78+
relayedOut.keys.exists(origin => matchesOrigin(htlcIn.add, origin)) || settledHtlcs.contains((htlcIn.add.channelId, htlcIn.add.id))
79+
})
7480
cleanupRelayDb(htlcsIn, nodeParams.db.pendingCommands)
7581

7682
log.info(s"htlcsIn=${htlcsIn.length} notRelayed=${notRelayed.length} relayedOut=${relayedOut.values.flatten.size}")
@@ -332,6 +338,7 @@ object PostRestartHtlcCleaner {
332338
def groupByOrigin(htlcsOut: Seq[(Origin, ByteVector32, Long)], htlcsIn: Seq[IncomingHtlc]): Map[Origin, Set[(ByteVector32, Long)]] =
333339
htlcsOut
334340
.groupBy { case (origin, _, _) => origin }
341+
.view
335342
.mapValues(_.map { case (_, channelId, htlcId) => (channelId, htlcId) }.toSet)
336343
// We are only interested in HTLCs that are pending upstream (not fulfilled nor failed yet).
337344
// It may be the case that we have unresolved HTLCs downstream that have been resolved upstream when the downstream

eclair-core/src/main/scala/fr/acinq/eclair/payment/send/PaymentLifecycle.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ class PaymentLifecycle(nodeParams: NodeParams, cfg: SendPaymentConfig, router: A
177177
}
178178
RemoteFailure(cfg.fullRoute(route), e)
179179
case Failure(t) =>
180-
log.warning(s"cannot parse returned error: ${t.getMessage}")
180+
log.warning(s"cannot parse returned error ${fail.reason.toHex} with sharedSecrets=$sharedSecrets: ${t.getMessage}")
181181
UnreadableRemoteFailure(cfg.fullRoute(route))
182182
}
183183
log.warning(s"too many failed attempts, failing the payment")

eclair-core/src/test/scala/fr/acinq/eclair/channel/states/e/NormalStateSpec.scala

+21-2
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ import fr.acinq.eclair.Features.StaticRemoteKey
2424
import fr.acinq.eclair.TestConstants.{Alice, Bob}
2525
import fr.acinq.eclair.UInt64.Conversions._
2626
import fr.acinq.eclair._
27-
import fr.acinq.eclair.blockchain.{CurrentBlockCount, CurrentFeerates}
2827
import fr.acinq.eclair.blockchain.bitcoind.ZmqWatcher._
2928
import fr.acinq.eclair.blockchain.fee.{FeeratePerByte, FeeratePerKw, FeeratesPerKw}
29+
import fr.acinq.eclair.blockchain.{CurrentBlockCount, CurrentFeerates}
3030
import fr.acinq.eclair.channel.Channel._
3131
import fr.acinq.eclair.channel.TxPublisher.{PublishRawTx, PublishTx}
3232
import fr.acinq.eclair.channel._
@@ -39,7 +39,7 @@ import fr.acinq.eclair.router.Announcements
3939
import fr.acinq.eclair.transactions.DirectedHtlc.{incoming, outgoing}
4040
import fr.acinq.eclair.transactions.Transactions
4141
import fr.acinq.eclair.transactions.Transactions.{DefaultCommitmentFormat, HtlcSuccessTx, weight2fee}
42-
import fr.acinq.eclair.wire.protocol.{AnnouncementSignatures, ChannelUpdate, ClosingSigned, CommitSig, Error, FailureMessageCodecs, PermanentChannelFailure, RevokeAndAck, Shutdown, UpdateAddHtlc, UpdateFailHtlc, UpdateFailMalformedHtlc, UpdateFee, UpdateFulfillHtlc}
42+
import fr.acinq.eclair.wire.protocol.{AnnouncementSignatures, ChannelUpdate, ClosingSigned, CommitSig, Error, FailureMessageCodecs, PermanentChannelFailure, RevokeAndAck, Shutdown, TemporaryNodeFailure, UpdateAddHtlc, UpdateFailHtlc, UpdateFailMalformedHtlc, UpdateFee, UpdateFulfillHtlc}
4343
import org.scalatest.funsuite.FixtureAnyFunSuiteLike
4444
import org.scalatest.{Outcome, Tag}
4545
import scodec.bits._
@@ -1364,6 +1364,25 @@ class NormalStateSpec extends TestKitBaseClass with FixtureAnyFunSuiteLike with
13641364
assert(initialState == bob.stateData)
13651365
}
13661366

1367+
test("recv CMD_FAIL_HTLC (htlc pending fulfill)") { f =>
1368+
import f._
1369+
1370+
val sender = TestProbe()
1371+
val (r, htlc) = addHtlc(50000000 msat, alice, bob, alice2bob, bob2alice)
1372+
crossSign(alice, bob, alice2bob, bob2alice)
1373+
1374+
// HTLC is fulfilled but alice doesn't send its revocation.
1375+
bob ! CMD_FULFILL_HTLC(htlc.id, r)
1376+
bob ! CMD_SIGN()
1377+
bob2alice.expectMsgType[UpdateFulfillHtlc]
1378+
bob2alice.expectMsgType[CommitSig]
1379+
1380+
// We cannot fail the HTLC, we must wait for the fulfill to be acked.
1381+
val c = CMD_FAIL_HTLC(htlc.id, Right(TemporaryNodeFailure), replyTo_opt = Some(sender.ref))
1382+
bob ! c
1383+
sender.expectMsg(RES_FAILURE(c, UnknownHtlcId(channelId(bob), htlc.id)))
1384+
}
1385+
13671386
test("recv CMD_FAIL_HTLC (acknowledge in case of failure)") { f =>
13681387
import f._
13691388
val sender = TestProbe()

eclair-core/src/test/scala/fr/acinq/eclair/io/PeerSpec.scala

+17
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,23 @@ class PeerSpec extends TestKitBaseClass with FixtureAnyFunSuiteLike with Paralle
367367
assert(init.fundingAmount === 15000.sat)
368368
assert(init.pushAmount === 100.msat)
369369
}
370+
371+
test("handle final channelId assigned in state DISCONNECTED") { f =>
372+
import f._
373+
val probe = TestProbe()
374+
connect(remoteNodeId, peer, peerConnection, channels = Set(ChannelCodecsSpec.normal))
375+
peer ! ConnectionDown(peerConnection.ref)
376+
probe.send(peer, Peer.GetPeerInfo)
377+
val peerInfo1 = probe.expectMsgType[Peer.PeerInfo]
378+
assert(peerInfo1.state === "DISCONNECTED")
379+
assert(peerInfo1.channels === 1)
380+
peer ! ChannelIdAssigned(probe.ref, remoteNodeId, randomBytes32(), randomBytes32())
381+
probe.send(peer, Peer.GetPeerInfo)
382+
val peerInfo2 = probe.expectMsgType[Peer.PeerInfo]
383+
assert(peerInfo2.state === "DISCONNECTED")
384+
assert(peerInfo2.channels === 2)
385+
}
386+
370387
}
371388

372389
object PeerSpec {

0 commit comments

Comments
 (0)