argotorg · ekpyron · Mar 10, 2025 · Aug 27, 2024 · cameel · Mar 10, 2025
diff --git a/Changelog.md b/Changelog.md
@@ -6,6 +6,7 @@ Language Features:
 
 Compiler Features:
  * Error Reporting: Errors reported during code generation now point at the location of the contract when more fine-grained location is not available.
+ * ethdebug: Experimental support for instructions and source locations.
  * EVM: Support for the EVM version "Osaka".
  * EVM Assembly Import: Allow enabling opcode-based optimizer.
  * General: The experimental EOF backend implements a subset of EOF sufficient to compile arbitrary high-level Solidity syntax via IR with optimization enabled.

diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp
@@ -1281,6 +1281,21 @@ LinkerObject const& Assembly::assembleLegacy() const
 	uint8_t tagPush = static_cast<uint8_t>(pushInstruction(bytesPerTag));
 	uint8_t dataRefPush = static_cast<uint8_t>(pushInstruction(bytesPerDataRef));
 
+	LinkerObject::CodeSectionLocation codeSectionLocation;
+	codeSectionLocation.start = 0;
+	size_t assemblyItemIndex = 0;
+	auto assembleInstruction = [&](auto&& _addInstruction) {
+		size_t start = ret.bytecode.size();
+		_addInstruction();
+		size_t end = ret.bytecode.size();
+		codeSectionLocation.instructionLocations.emplace_back(
+			LinkerObject::InstructionLocation{
+				.start = start,
+				.end = end,
+				.assemblyItemIndex = assemblyItemIndex
+			}
+		);
+	};
-	auto assembleInstruction = [&](auto&& _addInstruction) {
-		size_t start = ret.bytecode.size();
-		_addInstruction();
-		size_t end = ret.bytecode.size();
-		codeSectionLocation.instructionLocations.emplace_back(
-			LinkerObject::InstructionLocation{
-				.start = start,
-				.end = end,
-				.assemblyItemIndex = assemblyItemIndex
-			}
-		);
-	};
+	auto assembleInstruction = [](LinkerObject const& _linkerObject, Instruction _opcode, bytes const& _immediates, size_t assemblyItemIndex) {
+		_linkerObject.bytecode += static_cast<uint8_t>(_opcode);
+		_linkerObject.bytecode += _immediates;
+		_linkerObject.codeSectionLocations[0].instructionLocations.push_back(
+			LinkerObject::InstructionLocation{
+				.start = ret.bytecode.size(),
+				.end = ret.bytecode.size() 1 + _immediates.size(),
+				.assemblyItemIndex = assemblyItemIndex
+			}
+		);
+		_linkerObject.codeSectionLocations[0].end += 1 + _immediates.size();
+	};
-	auto assembleInstruction = [&](auto&& _addInstruction) {
-		size_t start = ret.bytecode.size();
-		_addInstruction();
-		size_t end = ret.bytecode.size();
-		codeSectionLocation.instructionLocations.emplace_back(
-			LinkerObject::InstructionLocation{
-				.start = start,
-				.end = end,
-				.assemblyItemIndex = assemblyItemIndex
-			}
-		);
-	};
+	auto assembleInstruction = [](LinkerObject const& _linkerObject, Instruction _opcode, bytes const& _immediates, size_t assemblyItemIndex) {
+		_linkerObject.bytecode += static_cast<uint8_t>(_opcode);
+		_linkerObject.bytecode += _immediates;
+		_linkerObject.codeSectionLocations[0].instructionLocations.push_back(
+			LinkerObject::InstructionLocation{
+				.start = ret.bytecode.size(),
+				.end = ret.bytecode.size() 1 + _immediates.size(),
+				.assemblyItemIndex = assemblyItemIndex
+			}
+		);
+		_linkerObject.codeSectionLocations[0].end += 1 + _immediates.size();
+	};
 	for (AssemblyItem const& item: items)
 	{
 		// store position of the invalid jump destination
@@ -1290,63 +1305,81 @@ LinkerObject const& Assembly::assembleLegacy() const
 		switch (item.type())
 		{
 		case Operation:
-			ret.bytecode += assembleOperation(item);
+			assembleInstruction([&](){
+				ret.bytecode += assembleOperation(item);
+			});
 			break;
 		case Push:
-			ret.bytecode += assemblePush(item);
+			assembleInstruction([&](){
+				ret.bytecode += assemblePush(item);
+			});
 			break;
 		case PushTag:
 		{
-			ret.bytecode.push_back(tagPush);
-			tagRefs[ret.bytecode.size()] = item.splitForeignPushTag();
-			ret.bytecode.resize(ret.bytecode.size() + bytesPerTag);
+			assembleInstruction([&](){
+				ret.bytecode.push_back(tagPush);
+				tagRefs[ret.bytecode.size()] = item.splitForeignPushTag();
+				ret.bytecode.resize(ret.bytecode.size() + bytesPerTag);
+			});
 			break;
 		}
 		case PushData:
-			ret.bytecode.push_back(dataRefPush);
-			dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size()));
-			ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
+			assembleInstruction([&]() {
+				ret.bytecode.push_back(dataRefPush);
+				dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size()));
+				ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
+			});
 			break;
 		case PushSub:
-			assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
-			ret.bytecode.push_back(dataRefPush);
-			subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
-			ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
+			assembleInstruction([&]() {
+				assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
+				ret.bytecode.push_back(dataRefPush);
+				subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
+				ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
+			});
 			break;
 		case PushSubSize:
 		{
-			assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
-			auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
-			item.setPushedValue(u256(s));
-			unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
-			ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
-			ret.bytecode.resize(ret.bytecode.size() + b);
-			bytesRef byr(&ret.bytecode.back() + 1 - b, b);
-			toBigEndian(s, byr);
+			assembleInstruction([&](){
+				assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
+				auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
+				item.setPushedValue(u256(s));
+				unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
+				ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
+				ret.bytecode.resize(ret.bytecode.size() + b);
+				bytesRef byr(&ret.bytecode.back() + 1 - b, b);
+				toBigEndian(s, byr);
+			});
 			break;
 		}
 		case PushProgramSize:
 		{
-			ret.bytecode.push_back(dataRefPush);
-			sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size()));
-			ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
+			assembleInstruction([&](){
+				ret.bytecode.push_back(dataRefPush);
+				sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size()));
+				ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
+			});
 			break;
 		}
 		case PushLibraryAddress:
 		{
-			auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size());
-			ret.bytecode += bytecode;
-			ret.linkReferences.insert(linkRef);
+			assembleInstruction([&]() {
+				auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size());
+				ret.bytecode += bytecode;
+				ret.linkReferences.insert(linkRef);
+			});
 			break;
 		}
 		case PushImmutable:
-			ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32));
-			// Maps keccak back to the "identifier" std::string of that immutable.
-			ret.immutableReferences[item.data()].first = m_immutables.at(item.data());
-			// Record the bytecode offset of the PUSH32 argument.
-			ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size());
-			// Advance bytecode by 32 bytes (default initialized).
-			ret.bytecode.resize(ret.bytecode.size() + 32);
+			assembleInstruction([&]() {
+				ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32));
+				// Maps keccak back to the "identifier" std::string of that immutable.
+				ret.immutableReferences[item.data()].first = m_immutables.at(item.data());
+				// Record the bytecode offset of the PUSH32 argument.
+				ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size());
+				// Advance bytecode by 32 bytes (default initialized).
+				ret.bytecode.resize(ret.bytecode.size() + 32);
+			});
 			break;
 		case VerbatimBytecode:
 			ret.bytecode += assembleVerbatimBytecode(item);
@@ -1359,35 +1392,59 @@ LinkerObject const& Assembly::assembleLegacy() const
 			{
 				if (i != offsets.size() - 1)
 				{
-					ret.bytecode.push_back(uint8_t(Instruction::DUP2));
-					ret.bytecode.push_back(uint8_t(Instruction::DUP2));
+					assembleInstruction([&]() {
+						ret.bytecode.push_back(uint8_t(Instruction::DUP2));
+					});
+					assembleInstruction([&]() {
+						ret.bytecode.push_back(uint8_t(Instruction::DUP2));
+					});
 				}
-				// TODO: should we make use of the constant optimizer methods for pushing the offsets?
-				bytes offsetBytes = toCompactBigEndian(u256(offsets[i]));
-				ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size()))));
-				ret.bytecode += offsetBytes;
-				ret.bytecode.push_back(uint8_t(Instruction::ADD));
-				ret.bytecode.push_back(uint8_t(Instruction::MSTORE));
+				assembleInstruction([&]() {
+					// TODO: should we make use of the constant optimizer methods for pushing the offsets?
+					bytes offsetBytes = toCompactBigEndian(u256(offsets[i]));
+					ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size()))));
+					ret.bytecode += offsetBytes;
+				});
+				assembleInstruction([&]() {
+					ret.bytecode.push_back(uint8_t(Instruction::ADD));
+				});
+				assembleInstruction([&]() {
+					ret.bytecode.push_back(uint8_t(Instruction::MSTORE));
+				});
 			}
 			if (offsets.empty())
 			{
-				ret.bytecode.push_back(uint8_t(Instruction::POP));
-				ret.bytecode.push_back(uint8_t(Instruction::POP));
+				assembleInstruction([&]() {
+					ret.bytecode.push_back(uint8_t(Instruction::POP));
+				});
+				assembleInstruction([&]() {
+					ret.bytecode.push_back(uint8_t(Instruction::POP));
+				});
 			}
 			immutableReferencesBySub.erase(item.data());
 			break;
 		}
 		case PushDeployTimeAddress:
-			ret.bytecode += assemblePushDeployTimeAddress();
+			assembleInstruction([&]() {
+				ret.bytecode += assemblePushDeployTimeAddress();
+			});
 			break;
 		case Tag:
-			ret.bytecode += assembleTag(item, ret.bytecode.size(), true);
+			assembleInstruction([&](){
+				ret.bytecode += assembleTag(item, ret.bytecode.size(), true);
+			});
 			break;
 		default:
 			solAssert(false, "Unexpected opcode while assembling.");
 		}
+
+		++assemblyItemIndex;
 	}
 
+	codeSectionLocation.end = ret.bytecode.size();
+
+	ret.codeSectionLocations.emplace_back(std::move(codeSectionLocation));
+
 	if (!immutableReferencesBySub.empty())
 		throw
 			langutil::Error(

diff --git a/libevmasm/CMakeLists.txt b/libevmasm/CMakeLists.txt
@@ -4,6 +4,8 @@ set(sources
 	Assembly.h
 	AssemblyItem.cpp
 	AssemblyItem.h
+	Ethdebug.cpp
+	Ethdebug.h
 	EVMAssemblyStack.cpp
 	EVMAssemblyStack.h
 	BlockDeduplicator.cpp

diff --git a/libevmasm/Ethdebug.cpp b/libevmasm/Ethdebug.cpp
@@ -0,0 +1,115 @@
+/*
+	This file is part of solidity.
+
+	solidity is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	solidity is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with solidity.  If not, see <http://www.gnu.org/licenses/>.
+*/
+// SPDX-License-Identifier: GPL-3.0
+
+#include <libevmasm/Ethdebug.h>
+
+using namespace solidity;
+using namespace solidity::evmasm;
+using namespace solidity::evmasm::ethdebug;
+
+namespace
+{
+
+Json programInstructions(Assembly const* _assembly, LinkerObject const& _linkerObject, unsigned _sourceId)
+{
+	// e.g. interfaces don't have a valid assembly object.
+	if (_assembly)
+	{
+		solUnimplementedAssert(_assembly->eofVersion() == std::nullopt, "ethdebug does not yet support EOF.");
+		solUnimplementedAssert(_assembly->codeSections().size() == 1, "ethdebug does not yet support multiple code-sections.");
+		for (auto const& instruction: _assembly->codeSections()[0].items)
+			solUnimplementedAssert(instruction.type() != VerbatimBytecode, "Verbatim bytecode is currently not supported by ethdebug.");
+	}
+
+	solAssert(_linkerObject.codeSectionLocations.size() == 1);
+	solAssert(_linkerObject.codeSectionLocations[0].end <= _linkerObject.bytecode.size());
+	Json instructions = Json::array();
+	for (size_t i = 0; i < _linkerObject.codeSectionLocations[0].instructionLocations.size(); ++i)
+	{
+		solAssert(_assembly);
+		LinkerObject::InstructionLocation currentInstruction = _linkerObject.codeSectionLocations[0].instructionLocations[i];
+		size_t start = currentInstruction.start;
+		size_t end = currentInstruction.end;
+		size_t assemblyItemIndex = currentInstruction.assemblyItemIndex;
+		solAssert(end <= _linkerObject.bytecode.size());
+		solAssert(start < end);
+		solAssert(assemblyItemIndex < _assembly->codeSections().at(0).items.size());
+		Json operation = Json::object();
+		operation["mnemonic"] = instructionInfo(static_cast<Instruction>(_linkerObject.bytecode[start]), _assembly->evmVersion()).name;
+		static size_t constexpr instructionSize = 1;
+		if (start + instructionSize < end)
+		{
+			bytes const argumentData(
+				_linkerObject.bytecode.begin() + static_cast<std::ptrdiff_t>(start) + instructionSize,
+				_linkerObject.bytecode.begin() + static_cast<std::ptrdiff_t>(end)
+			);
+			solAssert(!argumentData.empty());
+			operation["arguments"] = Json::array({util::toHex(argumentData, util::HexPrefix::Add)});
+		}
+		langutil::SourceLocation const& location = _assembly->codeSections().at(0).items.at(assemblyItemIndex).location();
+		Json instruction = Json::object();
+		instruction["offset"] = start;
+		instruction["operation"] = operation;
+
+		instruction["context"] = Json::object();
+		instruction["context"]["code"] = Json::object();
+		instruction["context"]["code"]["source"] = Json::object();
+		instruction["context"]["code"]["source"]["id"] = static_cast<int>(_sourceId);
+
+		instruction["context"]["code"]["range"] = Json::object();
+		instruction["context"]["code"]["range"]["offset"] = location.start;
+		instruction["context"]["code"]["range"]["length"] = location.end - location.start;
+		instructions.emplace_back(instruction);
+	}
+
+	return instructions;
+}
+
+} // anonymous namespace
+
+Json ethdebug::program(std::string_view _name, unsigned _sourceId, Assembly const* _assembly, LinkerObject const& _linkerObject)
+{
+	Json result = Json::object();
+	result["contract"] = Json::object();
+	result["contract"]["name"] = _name;
+	result["contract"]["definition"] = Json::object();
+	result["contract"]["definition"]["source"] = Json::object();
+	result["contract"]["definition"]["source"]["id"] = _sourceId;
+	result["environment"] = (!_assembly || _assembly->isCreation()) ? "create" : "call";
+	result["instructions"] = programInstructions(_assembly, _linkerObject, _sourceId);
+	return result;
+}
+
+Json ethdebug::resources(std::vector<std::string> const& _sources, std::string const& _version)
+{
+	Json sources = Json::array();
+	for (size_t id = 0; id < _sources.size(); ++id)
+	{
+		Json source = Json::object();
+		source["id"] = id;
+		source["path"] = _sources[id];
+		sources.push_back(source);
+	}
+	Json result = Json::object();
+	result["compilation"] = Json::object();
+	result["compilation"]["compiler"] = Json::object();
+	result["compilation"]["compiler"]["name"] = "solc";
+	result["compilation"]["compiler"]["version"] = _version;
+	result["compilation"]["sources"] = sources;
+	return result;
+}
diff --git a/libevmasm/Ethdebug.h b/libevmasm/Ethdebug.h
@@ -0,0 +1,35 @@
+/*
+	This file is part of solidity.
+
+	solidity is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	solidity is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with solidity.  If not, see <http://www.gnu.org/licenses/>.
+*/
+// SPDX-License-Identifier: GPL-3.0
+
+#pragma once
+
+#include <libsolutil/JSON.h>
+
+#include <libevmasm/Assembly.h>
+#include <libevmasm/LinkerObject.h>
+
+namespace solidity::evmasm::ethdebug
+{
+
+// returns ethdebug/format/program.
+Json program(std::string_view _name, unsigned _sourceId, Assembly const* _assembly, LinkerObject const& _linkerObject);
+
+// returns ethdebug/format/info/resources
+Json resources(std::vector<std::string> const& _sources, std::string const& _version);
+
+} // namespace solidity::evmasm::ethdebug