-
Notifications
You must be signed in to change notification settings - Fork 6.1k
Add support for instructions and source ranges. #15368
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1281,6 +1281,21 @@ LinkerObject const& Assembly::assembleLegacy() const | |
| uint8_t tagPush = static_cast<uint8_t>(pushInstruction(bytesPerTag)); | ||
| uint8_t dataRefPush = static_cast<uint8_t>(pushInstruction(bytesPerDataRef)); | ||
|
|
||
| LinkerObject::CodeSectionLocation codeSectionLocation; | ||
| codeSectionLocation.start = 0; | ||
| size_t assemblyItemIndex = 0; | ||
| auto assembleInstruction = [&](auto&& _addInstruction) { | ||
| size_t start = ret.bytecode.size(); | ||
| _addInstruction(); | ||
| size_t end = ret.bytecode.size(); | ||
| codeSectionLocation.instructionLocations.emplace_back( | ||
| LinkerObject::InstructionLocation{ | ||
| .start = start, | ||
| .end = end, | ||
| .assemblyItemIndex = assemblyItemIndex | ||
| } | ||
| ); | ||
| }; | ||
| for (AssemblyItem const& item: items) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A more modern version of manually incrementing the instruction index would be to use Amounts to something like
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At first I changed that to
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be more readable if you made it side-effect free and instead explicitly passed |
||
| { | ||
| // store position of the invalid jump destination | ||
|
|
@@ -1290,63 +1305,81 @@ LinkerObject const& Assembly::assembleLegacy() const | |
| switch (item.type()) | ||
| { | ||
| case Operation: | ||
| ret.bytecode += assembleOperation(item); | ||
| assembleInstruction([&](){ | ||
| ret.bytecode += assembleOperation(item); | ||
| }); | ||
| break; | ||
| case Push: | ||
| ret.bytecode += assemblePush(item); | ||
| assembleInstruction([&](){ | ||
| ret.bytecode += assemblePush(item); | ||
| }); | ||
| break; | ||
| case PushTag: | ||
| { | ||
| ret.bytecode.push_back(tagPush); | ||
| tagRefs[ret.bytecode.size()] = item.splitForeignPushTag(); | ||
| ret.bytecode.resize(ret.bytecode.size() + bytesPerTag); | ||
| assembleInstruction([&](){ | ||
| ret.bytecode.push_back(tagPush); | ||
| tagRefs[ret.bytecode.size()] = item.splitForeignPushTag(); | ||
| ret.bytecode.resize(ret.bytecode.size() + bytesPerTag); | ||
| }); | ||
| break; | ||
| } | ||
| case PushData: | ||
| ret.bytecode.push_back(dataRefPush); | ||
| dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size())); | ||
| ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); | ||
| assembleInstruction([&]() { | ||
| ret.bytecode.push_back(dataRefPush); | ||
| dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size())); | ||
| ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); | ||
| }); | ||
| break; | ||
| case PushSub: | ||
| assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, ""); | ||
| ret.bytecode.push_back(dataRefPush); | ||
| subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size())); | ||
| ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); | ||
| assembleInstruction([&]() { | ||
| assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, ""); | ||
| ret.bytecode.push_back(dataRefPush); | ||
| subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size())); | ||
| ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); | ||
| }); | ||
| break; | ||
| case PushSubSize: | ||
| { | ||
| assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, ""); | ||
| auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size(); | ||
| item.setPushedValue(u256(s)); | ||
| unsigned b = std::max<unsigned>(1, numberEncodingSize(s)); | ||
| ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b))); | ||
| ret.bytecode.resize(ret.bytecode.size() + b); | ||
| bytesRef byr(&ret.bytecode.back() + 1 - b, b); | ||
| toBigEndian(s, byr); | ||
| assembleInstruction([&](){ | ||
| assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, ""); | ||
| auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size(); | ||
| item.setPushedValue(u256(s)); | ||
| unsigned b = std::max<unsigned>(1, numberEncodingSize(s)); | ||
| ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b))); | ||
| ret.bytecode.resize(ret.bytecode.size() + b); | ||
| bytesRef byr(&ret.bytecode.back() + 1 - b, b); | ||
| toBigEndian(s, byr); | ||
| }); | ||
| break; | ||
| } | ||
| case PushProgramSize: | ||
| { | ||
| ret.bytecode.push_back(dataRefPush); | ||
| sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size())); | ||
| ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); | ||
| assembleInstruction([&](){ | ||
| ret.bytecode.push_back(dataRefPush); | ||
| sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size())); | ||
| ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); | ||
| }); | ||
| break; | ||
| } | ||
| case PushLibraryAddress: | ||
| { | ||
| auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size()); | ||
| ret.bytecode += bytecode; | ||
| ret.linkReferences.insert(linkRef); | ||
| assembleInstruction([&]() { | ||
| auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size()); | ||
| ret.bytecode += bytecode; | ||
| ret.linkReferences.insert(linkRef); | ||
| }); | ||
| break; | ||
| } | ||
| case PushImmutable: | ||
| ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32)); | ||
| // Maps keccak back to the "identifier" std::string of that immutable. | ||
| ret.immutableReferences[item.data()].first = m_immutables.at(item.data()); | ||
| // Record the bytecode offset of the PUSH32 argument. | ||
| ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size()); | ||
| // Advance bytecode by 32 bytes (default initialized). | ||
| ret.bytecode.resize(ret.bytecode.size() + 32); | ||
| assembleInstruction([&]() { | ||
| ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32)); | ||
| // Maps keccak back to the "identifier" std::string of that immutable. | ||
| ret.immutableReferences[item.data()].first = m_immutables.at(item.data()); | ||
| // Record the bytecode offset of the PUSH32 argument. | ||
| ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size()); | ||
| // Advance bytecode by 32 bytes (default initialized). | ||
| ret.bytecode.resize(ret.bytecode.size() + 32); | ||
| }); | ||
| break; | ||
| case VerbatimBytecode: | ||
| ret.bytecode += assembleVerbatimBytecode(item); | ||
|
|
@@ -1359,35 +1392,59 @@ LinkerObject const& Assembly::assembleLegacy() const | |
| { | ||
| if (i != offsets.size() - 1) | ||
| { | ||
| ret.bytecode.push_back(uint8_t(Instruction::DUP2)); | ||
| ret.bytecode.push_back(uint8_t(Instruction::DUP2)); | ||
| assembleInstruction([&]() { | ||
| ret.bytecode.push_back(uint8_t(Instruction::DUP2)); | ||
| }); | ||
| assembleInstruction([&]() { | ||
| ret.bytecode.push_back(uint8_t(Instruction::DUP2)); | ||
| }); | ||
| } | ||
| // TODO: should we make use of the constant optimizer methods for pushing the offsets? | ||
| bytes offsetBytes = toCompactBigEndian(u256(offsets[i])); | ||
| ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size())))); | ||
| ret.bytecode += offsetBytes; | ||
| ret.bytecode.push_back(uint8_t(Instruction::ADD)); | ||
| ret.bytecode.push_back(uint8_t(Instruction::MSTORE)); | ||
| assembleInstruction([&]() { | ||
| // TODO: should we make use of the constant optimizer methods for pushing the offsets? | ||
| bytes offsetBytes = toCompactBigEndian(u256(offsets[i])); | ||
| ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size())))); | ||
| ret.bytecode += offsetBytes; | ||
| }); | ||
| assembleInstruction([&]() { | ||
| ret.bytecode.push_back(uint8_t(Instruction::ADD)); | ||
| }); | ||
| assembleInstruction([&]() { | ||
| ret.bytecode.push_back(uint8_t(Instruction::MSTORE)); | ||
| }); | ||
| } | ||
| if (offsets.empty()) | ||
| { | ||
| ret.bytecode.push_back(uint8_t(Instruction::POP)); | ||
| ret.bytecode.push_back(uint8_t(Instruction::POP)); | ||
| assembleInstruction([&]() { | ||
| ret.bytecode.push_back(uint8_t(Instruction::POP)); | ||
| }); | ||
| assembleInstruction([&]() { | ||
| ret.bytecode.push_back(uint8_t(Instruction::POP)); | ||
| }); | ||
| } | ||
| immutableReferencesBySub.erase(item.data()); | ||
| break; | ||
| } | ||
| case PushDeployTimeAddress: | ||
| ret.bytecode += assemblePushDeployTimeAddress(); | ||
| assembleInstruction([&]() { | ||
| ret.bytecode += assemblePushDeployTimeAddress(); | ||
| }); | ||
| break; | ||
| case Tag: | ||
| ret.bytecode += assembleTag(item, ret.bytecode.size(), true); | ||
| assembleInstruction([&](){ | ||
| ret.bytecode += assembleTag(item, ret.bytecode.size(), true); | ||
| }); | ||
| break; | ||
| default: | ||
| solAssert(false, "Unexpected opcode while assembling."); | ||
| } | ||
|
|
||
| ++assemblyItemIndex; | ||
| } | ||
|
|
||
| codeSectionLocation.end = ret.bytecode.size(); | ||
|
|
||
| ret.codeSectionLocations.emplace_back(std::move(codeSectionLocation)); | ||
|
|
||
| if (!immutableReferencesBySub.empty()) | ||
| throw | ||
| langutil::Error( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,115 @@ | ||
| /* | ||
| This file is part of solidity. | ||
|
|
||
| solidity is free software: you can redistribute it and/or modify | ||
| it under the terms of the GNU General Public License as published by | ||
| the Free Software Foundation, either version 3 of the License, or | ||
| (at your option) any later version. | ||
|
|
||
| solidity is distributed in the hope that it will be useful, | ||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| GNU General Public License for more details. | ||
|
|
||
| You should have received a copy of the GNU General Public License | ||
| along with solidity. If not, see <http://www.gnu.org/licenses/>. | ||
| */ | ||
| // SPDX-License-Identifier: GPL-3.0 | ||
|
|
||
| #include <libevmasm/Ethdebug.h> | ||
clonker marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| using namespace solidity; | ||
| using namespace solidity::evmasm; | ||
| using namespace solidity::evmasm::ethdebug; | ||
|
|
||
| namespace | ||
| { | ||
|
|
||
| Json programInstructions(Assembly const* _assembly, LinkerObject const& _linkerObject, unsigned _sourceId) | ||
| { | ||
| // e.g. interfaces don't have a valid assembly object. | ||
| if (_assembly) | ||
| { | ||
| solUnimplementedAssert(_assembly->eofVersion() == std::nullopt, "ethdebug does not yet support EOF."); | ||
| solUnimplementedAssert(_assembly->codeSections().size() == 1, "ethdebug does not yet support multiple code-sections."); | ||
| for (auto const& instruction: _assembly->codeSections()[0].items) | ||
| solUnimplementedAssert(instruction.type() != VerbatimBytecode, "Verbatim bytecode is currently not supported by ethdebug."); | ||
| } | ||
|
|
||
| solAssert(_linkerObject.codeSectionLocations.size() == 1); | ||
| solAssert(_linkerObject.codeSectionLocations[0].end <= _linkerObject.bytecode.size()); | ||
| Json instructions = Json::array(); | ||
| for (size_t i = 0; i < _linkerObject.codeSectionLocations[0].instructionLocations.size(); ++i) | ||
| { | ||
| solAssert(_assembly); | ||
| LinkerObject::InstructionLocation currentInstruction = _linkerObject.codeSectionLocations[0].instructionLocations[i]; | ||
| size_t start = currentInstruction.start; | ||
| size_t end = currentInstruction.end; | ||
| size_t assemblyItemIndex = currentInstruction.assemblyItemIndex; | ||
| solAssert(end <= _linkerObject.bytecode.size()); | ||
| solAssert(start < end); | ||
| solAssert(assemblyItemIndex < _assembly->codeSections().at(0).items.size()); | ||
| Json operation = Json::object(); | ||
| operation["mnemonic"] = instructionInfo(static_cast<Instruction>(_linkerObject.bytecode[start]), _assembly->evmVersion()).name; | ||
| static size_t constexpr instructionSize = 1; | ||
| if (start + instructionSize < end) | ||
| { | ||
| bytes const argumentData( | ||
| _linkerObject.bytecode.begin() + static_cast<std::ptrdiff_t>(start) + instructionSize, | ||
| _linkerObject.bytecode.begin() + static_cast<std::ptrdiff_t>(end) | ||
| ); | ||
| solAssert(!argumentData.empty()); | ||
| operation["arguments"] = Json::array({util::toHex(argumentData, util::HexPrefix::Add)}); | ||
| } | ||
| langutil::SourceLocation const& location = _assembly->codeSections().at(0).items.at(assemblyItemIndex).location(); | ||
| Json instruction = Json::object(); | ||
| instruction["offset"] = start; | ||
| instruction["operation"] = operation; | ||
|
|
||
| instruction["context"] = Json::object(); | ||
| instruction["context"]["code"] = Json::object(); | ||
| instruction["context"]["code"]["source"] = Json::object(); | ||
| instruction["context"]["code"]["source"]["id"] = static_cast<int>(_sourceId); | ||
|
|
||
| instruction["context"]["code"]["range"] = Json::object(); | ||
| instruction["context"]["code"]["range"]["offset"] = location.start; | ||
| instruction["context"]["code"]["range"]["length"] = location.end - location.start; | ||
| instructions.emplace_back(instruction); | ||
| } | ||
|
|
||
| return instructions; | ||
| } | ||
|
|
||
| } // anonymous namespace | ||
|
|
||
| Json ethdebug::program(std::string_view _name, unsigned _sourceId, Assembly const* _assembly, LinkerObject const& _linkerObject) | ||
| { | ||
| Json result = Json::object(); | ||
| result["contract"] = Json::object(); | ||
| result["contract"]["name"] = _name; | ||
| result["contract"]["definition"] = Json::object(); | ||
| result["contract"]["definition"]["source"] = Json::object(); | ||
| result["contract"]["definition"]["source"]["id"] = _sourceId; | ||
| result["environment"] = (!_assembly || _assembly->isCreation()) ? "create" : "call"; | ||
| result["instructions"] = programInstructions(_assembly, _linkerObject, _sourceId); | ||
| return result; | ||
clonker marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| Json ethdebug::resources(std::vector<std::string> const& _sources, std::string const& _version) | ||
| { | ||
| Json sources = Json::array(); | ||
| for (size_t id = 0; id < _sources.size(); ++id) | ||
| { | ||
| Json source = Json::object(); | ||
| source["id"] = id; | ||
| source["path"] = _sources[id]; | ||
| sources.push_back(source); | ||
| } | ||
| Json result = Json::object(); | ||
| result["compilation"] = Json::object(); | ||
| result["compilation"]["compiler"] = Json::object(); | ||
| result["compilation"]["compiler"]["name"] = "solc"; | ||
| result["compilation"]["compiler"]["version"] = _version; | ||
| result["compilation"]["sources"] = sources; | ||
| return result; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| /* | ||
| This file is part of solidity. | ||
|
|
||
| solidity is free software: you can redistribute it and/or modify | ||
| it under the terms of the GNU General Public License as published by | ||
| the Free Software Foundation, either version 3 of the License, or | ||
| (at your option) any later version. | ||
|
|
||
| solidity is distributed in the hope that it will be useful, | ||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| GNU General Public License for more details. | ||
|
|
||
| You should have received a copy of the GNU General Public License | ||
| along with solidity. If not, see <http://www.gnu.org/licenses/>. | ||
| */ | ||
| // SPDX-License-Identifier: GPL-3.0 | ||
|
|
||
| #pragma once | ||
clonker marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| #include <libsolutil/JSON.h> | ||
|
|
||
| #include <libevmasm/Assembly.h> | ||
| #include <libevmasm/LinkerObject.h> | ||
|
|
||
| namespace solidity::evmasm::ethdebug | ||
| { | ||
|
|
||
| // returns ethdebug/format/program. | ||
| Json program(std::string_view _name, unsigned _sourceId, Assembly const* _assembly, LinkerObject const& _linkerObject); | ||
|
|
||
| // returns ethdebug/format/info/resources | ||
| Json resources(std::vector<std::string> const& _sources, std::string const& _version); | ||
|
|
||
| } // namespace solidity::evmasm::ethdebug | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regarding the helper, I had something like this in mind:
I.e. not depending on any external state, just arguments and clearly separating the bytecode from other stuff.
I'd not even make it a lambda - we'll need it in
assembleEOF()later, so it should be a normal function, maybe even a class member.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd even consider turning it into a class doing what it does currently as side-effect :-). But I'd say we can also wait until we inevitably port it to
assemblyEOFto nicen it up.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well, it would be good to at least make it take bytecode, not a callback.
But yeah, it's not like it's a blocker here. It would be very nice to have, but I approved already because at this point these are not things critical to the functionality.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But yeah, for the record: this is how I'd have done this: develop...ethdebug_instructions_and_source_ranges_refactor
Which includes doing it for EOF (which is just two lines in that version).
Just to keep anything ethdebug-related that's non-critical as separate from the rest as possible (I'd move the helper classes out of the file).
The only uglyiness there is the
AssignImmutablecase, which unfortunately is irregular compared to all other cases, but it concentrates that uglyness there. But yeah, we can see if we do something like that or some other solution after the release, maybe there's something even better.